diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 799e102d56174..920cf78485876 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -411,6 +411,12 @@ def FeatureVGPRIndexMode : SubtargetFeature<"vgpr-index-mode", "Has VGPR mode register indexing" >; +def FeatureScalarDwordx3Loads : SubtargetFeature<"scalar-dwordx3-loads", + "HasScalarDwordx3Loads", + "true", + "Has 96-bit scalar load instructions" +>; + def FeatureScalarStores : SubtargetFeature<"scalar-stores", "HasScalarStores", "true", @@ -1462,7 +1468,8 @@ def FeatureISAVersion12 : FeatureSet< FeatureVcmpxPermlaneHazard, FeatureSALUFloatInsts, FeatureVGPRSingleUseHintInsts, - FeatureMADIntraFwdBug]>; + FeatureMADIntraFwdBug, + FeatureScalarDwordx3Loads]>; //===----------------------------------------------------------------------===// @@ -2011,6 +2018,8 @@ def HasNoCvtFP8VOP1Bug : Predicate<"!Subtarget->hasCvtFP8VOP1Bug()">; def HasAtomicCSubNoRtnInsts : Predicate<"Subtarget->hasAtomicCSubNoRtnInsts()">; +def HasScalarDwordx3Loads : Predicate<"Subtarget->hasScalarDwordx3Loads()">; + // Include AMDGPU TD files include "SISchedule.td" include "GCNProcessors.td" diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 2f1e1809b6cd7..3b69a37728ea1 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -2635,7 +2635,7 @@ AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, if (RegKind == IS_SGPR || RegKind == IS_TTMP) { // SGPR and TTMP registers must be aligned. // Max required alignment is 4 dwords. - AlignSize = std::min(RegWidth / 32, 4u); + AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u); } if (RegNum % AlignSize != 0) { diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 1cfc5af571c1f..392bc626167cf 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -213,6 +213,7 @@ DECODE_OPERAND_REG_7(SReg_32_XM0_XEXEC, OPW32) DECODE_OPERAND_REG_7(SReg_32_XEXEC_HI, OPW32) DECODE_OPERAND_REG_7(SReg_64, OPW64) DECODE_OPERAND_REG_7(SReg_64_XEXEC, OPW64) +DECODE_OPERAND_REG_7(SReg_96, OPW96) DECODE_OPERAND_REG_7(SReg_128, OPW128) DECODE_OPERAND_REG_7(SReg_256, OPW256) DECODE_OPERAND_REG_7(SReg_512, OPW512) @@ -1239,6 +1240,8 @@ MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID, case AMDGPU::TTMP_64RegClassID: shift = 1; break; + case AMDGPU::SGPR_96RegClassID: + case AMDGPU::TTMP_96RegClassID: case AMDGPU::SGPR_128RegClassID: case AMDGPU::TTMP_128RegClassID: // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 8e1350be8b45f..e59b74835b42a 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -119,6 +119,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasFmaMixInsts = false; bool HasMovrel = false; bool HasVGPRIndexMode = false; + bool HasScalarDwordx3Loads = false; bool HasScalarStores = false; bool HasScalarAtomics = false; bool HasSDWAOmod = false; @@ -886,6 +887,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, return getGeneration() >= VOLCANIC_ISLANDS; } + bool hasScalarDwordx3Loads() const { return HasScalarDwordx3Loads; } + bool hasScalarStores() const { return HasScalarStores; } diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 7ea2280c474b0..981da13fe0895 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -414,7 +414,7 @@ def SGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, // SGPR 64-bit registers def SGPR_64Regs : SIRegisterTuples.ret, SGPR_32, 105, 2, 2, "s">; -// SGPR 96-bit registers. No operations use these, but for symmetry with 96-bit VGPRs. +// SGPR 96-bit registers. def SGPR_96Regs : SIRegisterTuples.ret, SGPR_32, 105, 4, 3, "s">; // SGPR 128-bit registers diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td index c18846483cf95..d24bfd535d4dd 100644 --- a/llvm/lib/Target/AMDGPU/SMInstructions.td +++ b/llvm/lib/Target/AMDGPU/SMInstructions.td @@ -74,7 +74,7 @@ class SM_Real bits<7> sdst; bits<32> offset; bits<8> soffset; - bits<5> cpol; + bits<5> cpol; } class OffsetMode; defm S_LOAD_DWORDX2 : SM_Pseudo_Loads ; +let SubtargetPredicate = HasScalarDwordx3Loads in + defm S_LOAD_DWORDX3 : SM_Pseudo_Loads ; defm S_LOAD_DWORDX4 : SM_Pseudo_Loads ; defm S_LOAD_DWORDX8 : SM_Pseudo_Loads ; defm S_LOAD_DWORDX16 : SM_Pseudo_Loads ; @@ -309,6 +311,8 @@ defm S_BUFFER_LOAD_DWORD : SM_Pseudo_Loads ; // FIXME: exec_lo/exec_hi appear to be allowed for SMRD loads on // SI/CI, bit disallowed for SMEM on VI. defm S_BUFFER_LOAD_DWORDX2 : SM_Pseudo_Loads ; +let SubtargetPredicate = HasScalarDwordx3Loads in + defm S_BUFFER_LOAD_DWORDX3 : SM_Pseudo_Loads ; defm S_BUFFER_LOAD_DWORDX4 : SM_Pseudo_Loads ; defm S_BUFFER_LOAD_DWORDX8 : SM_Pseudo_Loads ; defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_Loads ; @@ -1179,7 +1183,7 @@ def SMInfoTable : GenericTable { class SMEM_Real_gfx11 op, SM_Pseudo ps, string opName = ps.Mnemonic> : SMEM_Real_10Plus_common { - let AssemblerPredicate = isGFX11Plus; + let AssemblerPredicate = isGFX11Only; let DecoderNamespace = "GFX11"; let Inst{13} = !if(ps.has_dlc, cpol{CPolBit.DLC}, 0); let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, 0); @@ -1235,19 +1239,30 @@ defm S_ATC_PROBE_BUFFER : SM_Real_Probe_gfx11 <0x23>; // GFX12. //===----------------------------------------------------------------------===// -class SMEM_Real_gfx12 op, SM_Pseudo ps, string opName = ps.Mnemonic> : - SMEM_Real_10Plus_common { +class SMEM_Real_gfx12Plus op, SM_Pseudo ps, string opName, + int subtarget, RegisterWithSubRegs sgpr_null> : + SM_Real, SIMCInstr, Enc64 { + + let Inst{18-13} = op; + let Inst{31-26} = 0x3d; + + let Inst{55-32} = !if(ps.has_offset, offset{23-0}, !if(ps.has_soffset, 0, ?)); + let Inst{63-57} = !if(ps.has_soffset, soffset{6-0}, + !if(ps.has_offset, sgpr_null.HWEncoding{6-0}, ?)); +} + +class SMEM_Real_gfx12 op, SM_Pseudo ps, string opName = ps.Mnemonic> : + SMEM_Real_gfx12Plus { let AssemblerPredicate = isGFX12Plus; let DecoderNamespace = "GFX12"; - let Inst{18-13} = op{5-0}; - let Inst{19} = !if(ps.has_dlc, cpol{CPolBit.DLC}, 0); - let Inst{24-20} = ?; // TODO-GFX12: Add new bits {24-20}: TH, Scope, NV - let Inst{25} = !if(ps.has_glc, cpol{CPolBit.GLC}, 0); - let Inst{55-32} = offset{23-0}; + + let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?); + let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?); } -class SMEM_Real_Prefetch_gfx12 op, SM_Pseudo ps> : SMEM_Real_gfx12 { +class SMEM_Real_Prefetch_gfx12 op, SM_Pseudo ps> : + SMEM_Real_gfx12 { bits<7> sdata; // Only 5 bits of sdata are supported. let sdst = ?; @@ -1255,8 +1270,48 @@ class SMEM_Real_Prefetch_gfx12 op, SM_Pseudo ps> : SMEM_Real_gfx12 op, string ps, string opName, OffsetMode offsets> : + SMEM_Real_gfx12(ps # offsets.Variant), opName> { + RegisterClass BaseClass = !cast(ps # offsets.Variant).BaseClass; + let InOperandList = !con((ins BaseClass:$sbase), offsets.Ins, (ins CPol:$cpol)); + + let Inst{22-21} = cpol{4-3}; // scope + let Inst{24-23} = cpol{1-0}; // th - only lower 2 bits are supported +} + +multiclass SM_Real_Loads_gfx12 op, string ps = NAME> { + defvar opName = !tolower(NAME); + def _IMM_gfx12 : SMEM_Real_Load_gfx12; + def _SGPR_IMM_gfx12 : SMEM_Real_Load_gfx12; +} + +defm S_LOAD_B32 : SM_Real_Loads_gfx12<0x00, "S_LOAD_DWORD">; +defm S_LOAD_B64 : SM_Real_Loads_gfx12<0x01, "S_LOAD_DWORDX2">; +defm S_LOAD_B96 : SM_Real_Loads_gfx12<0x05, "S_LOAD_DWORDX3">; +defm S_LOAD_B128 : SM_Real_Loads_gfx12<0x02, "S_LOAD_DWORDX4">; +defm S_LOAD_B256 : SM_Real_Loads_gfx12<0x03, "S_LOAD_DWORDX8">; +defm S_LOAD_B512 : SM_Real_Loads_gfx12<0x04, "S_LOAD_DWORDX16">; + +defm S_BUFFER_LOAD_B32 : SM_Real_Loads_gfx12<0x10, "S_BUFFER_LOAD_DWORD">; +defm S_BUFFER_LOAD_B64 : SM_Real_Loads_gfx12<0x11, "S_BUFFER_LOAD_DWORDX2">; +defm S_BUFFER_LOAD_B96 : SM_Real_Loads_gfx12<0x15, "S_BUFFER_LOAD_DWORDX3">; +defm S_BUFFER_LOAD_B128 : SM_Real_Loads_gfx12<0x12, "S_BUFFER_LOAD_DWORDX4">; +defm S_BUFFER_LOAD_B256 : SM_Real_Loads_gfx12<0x13, "S_BUFFER_LOAD_DWORDX8">; +defm S_BUFFER_LOAD_B512 : SM_Real_Loads_gfx12<0x14, "S_BUFFER_LOAD_DWORDX16">; + +def S_DCACHE_INV_gfx12 : SMEM_Real_gfx12<0x021, S_DCACHE_INV>; + def S_PREFETCH_INST_gfx12 : SMEM_Real_Prefetch_gfx12<0x24, S_PREFETCH_INST>; def S_PREFETCH_INST_PC_REL_gfx12 : SMEM_Real_Prefetch_gfx12<0x25, S_PREFETCH_INST_PC_REL>; def S_PREFETCH_DATA_gfx12 : SMEM_Real_Prefetch_gfx12<0x26, S_PREFETCH_DATA>; def S_BUFFER_PREFETCH_DATA_gfx12 : SMEM_Real_Prefetch_gfx12<0x27, S_BUFFER_PREFETCH_DATA>; def S_PREFETCH_DATA_PC_REL_gfx12 : SMEM_Real_Prefetch_gfx12<0x28, S_PREFETCH_DATA_PC_REL>; + +multiclass SMEM_Real_Probe_gfx12 op> { + defvar ps = NAME; + def _IMM_gfx12 : SMEM_Real_Prefetch_gfx12(ps#_IMM)>; + def _SGPR_IMM_gfx12 : SMEM_Real_Prefetch_gfx12(ps#_SGPR_IMM)>; +} + +defm S_ATC_PROBE : SMEM_Real_Probe_gfx12<0x22>; +defm S_ATC_PROBE_BUFFER : SMEM_Real_Probe_gfx12<0x23>; diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_err.s index 9726dea77f98e..088ee416692b8 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_err.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_err.s @@ -158,3 +158,9 @@ scratch_store_b128 off, v[2:5], s0 offset:8000000 flat_atomic_add_f32 v1, v[0:1], v2 offset:-1 // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: expected a 12-bit unsigned offset + +s_load_b96 s[20:22], s[2:3], s0 +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +s_buffer_load_b96 s[20:22], s[4:7], s0 +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_smem.s b/llvm/test/MC/AMDGPU/gfx12_asm_smem.s index ed7ad5bb0c4e8..1566b9c04e349 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_smem.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_smem.s @@ -33,3 +33,813 @@ s_buffer_prefetch_data s[20:23], 100, s10, 7 s_buffer_prefetch_data s[20:23], 100, null, 7 // GFX12: s_buffer_prefetch_data s[20:23], 0x64, null, 7 ; encoding: [0xca,0xe1,0x04,0xf4,0x64,0x00,0x00,0xf8] + +s_load_b32 s5, s[2:3], s0 +// GFX12: s_load_b32 s5, s[2:3], s0 offset:0x0 ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b32 s101, s[2:3], s0 +// GFX12: s_load_b32 s101, s[2:3], s0 offset:0x0 ; encoding: [0x41,0x19,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b32 vcc_lo, s[2:3], s0 +// GFX12: s_load_b32 vcc_lo, s[2:3], s0 offset:0x0 ; encoding: [0x81,0x1a,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b32 vcc_hi, s[2:3], s0 +// GFX12: s_load_b32 vcc_hi, s[2:3], s0 offset:0x0 ; encoding: [0xc1,0x1a,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b32 s5, s[4:5], s0 +// GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 ; encoding: [0x42,0x01,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b32 s5, s[100:101], s0 +// GFX12: s_load_b32 s5, s[100:101], s0 offset:0x0 ; encoding: [0x72,0x01,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b32 s5, vcc, s0 +// GFX12: s_load_b32 s5, vcc, s0 offset:0x0 ; encoding: [0x75,0x01,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b32 s5, s[2:3], s101 +// GFX12: s_load_b32 s5, s[2:3], s101 offset:0x0 ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xca] + +s_load_b32 s5, s[2:3], vcc_lo +// GFX12: s_load_b32 s5, s[2:3], vcc_lo offset:0x0 ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xd4] + +s_load_b32 s5, s[2:3], vcc_hi +// GFX12: s_load_b32 s5, s[2:3], vcc_hi offset:0x0 ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xd6] + +s_load_b32 s5, s[2:3], m0 +// GFX12: s_load_b32 s5, s[2:3], m0 offset:0x0 ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xfa] + +s_load_b32 s5, s[2:3], 0x0 +// GFX12: s_load_b32 s5, s[2:3], 0x0 ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xf8] + +s_load_b32 s5, s[2:3], s7 offset:0x12345 +// GFX12: s_load_b32 s5, s[2:3], s7 offset:0x12345 ; encoding: [0x41,0x01,0x00,0xf4,0x45,0x23,0x01,0x0e] + +s_load_b64 s[10:11], s[2:3], s0 +// GFX12: s_load_b64 s[10:11], s[2:3], s0 offset:0x0 ; encoding: [0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b64 s[12:13], s[2:3], s0 +// GFX12: s_load_b64 s[12:13], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x23,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b64 s[100:101], s[2:3], s0 +// GFX12: s_load_b64 s[100:101], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x39,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b64 vcc, s[2:3], s0 +// GFX12: s_load_b64 vcc, s[2:3], s0 offset:0x0 ; encoding: [0x81,0x3a,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b64 s[10:11], s[4:5], s0 +// GFX12: s_load_b64 s[10:11], s[4:5], s0 offset:0x0 ; encoding: [0x82,0x22,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b64 s[10:11], s[100:101], s0 +// GFX12: s_load_b64 s[10:11], s[100:101], s0 offset:0x0 ; encoding: [0xb2,0x22,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b64 s[10:11], vcc, s0 +// GFX12: s_load_b64 s[10:11], vcc, s0 offset:0x0 ; encoding: [0xb5,0x22,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b64 s[10:11], s[2:3], s101 +// GFX12: s_load_b64 s[10:11], s[2:3], s101 offset:0x0 ; encoding: [0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xca] + +s_load_b64 s[10:11], s[2:3], vcc_lo +// GFX12: s_load_b64 s[10:11], s[2:3], vcc_lo offset:0x0 ; encoding: [0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xd4] + +s_load_b64 s[10:11], s[2:3], vcc_hi +// GFX12: s_load_b64 s[10:11], s[2:3], vcc_hi offset:0x0 ; encoding: [0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xd6] + +s_load_b64 s[10:11], s[2:3], m0 +// GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 ; encoding: [0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xfa] + +s_load_b64 s[10:11], s[2:3], 0x0 +// GFX12: s_load_b64 s[10:11], s[2:3], 0x0 ; encoding: [0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xf8] + +s_load_b96 s[20:22], s[2:3], s0 +// GFX12: s_load_b96 s[20:22], s[2:3], s0 offset:0x0 ; encoding: [0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b96 s[24:26], s[2:3], s0 +// GFX12: s_load_b96 s[24:26], s[2:3], s0 offset:0x0 ; encoding: [0x01,0xa6,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b96 s[96:98], s[2:3], s0 +// GFX12: s_load_b96 s[96:98], s[2:3], s0 offset:0x0 ; encoding: [0x01,0xb8,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b96 s[20:22], s[4:5], s0 +// GFX12: s_load_b96 s[20:22], s[4:5], s0 offset:0x0 ; encoding: [0x02,0xa5,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b96 s[20:22], s[100:101], s0 +// GFX12: s_load_b96 s[20:22], s[100:101], s0 offset:0x0 ; encoding: [0x32,0xa5,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b96 s[20:22], vcc, s0 +// GFX12: s_load_b96 s[20:22], vcc, s0 offset:0x0 ; encoding: [0x35,0xa5,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b96 s[20:22], s[2:3], s101 +// GFX12: s_load_b96 s[20:22], s[2:3], s101 offset:0x0 ; encoding: [0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xca] + +s_load_b96 s[20:22], s[2:3], vcc_lo +// GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 ; encoding: [0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xd4] + +s_load_b96 s[20:22], s[2:3], vcc_hi +// GFX12: s_load_b96 s[20:22], s[2:3], vcc_hi offset:0x0 ; encoding: [0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xd6] + +s_load_b96 s[20:22], s[2:3], m0 +// GFX12: s_load_b96 s[20:22], s[2:3], m0 offset:0x0 ; encoding: [0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xfa] + +s_load_b96 s[20:22], s[2:3], 0x0 +// GFX12: s_load_b96 s[20:22], s[2:3], 0x0 ; encoding: [0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xf8] + +s_load_b128 s[20:23], s[2:3], s0 +// GFX12: s_load_b128 s[20:23], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b128 s[24:27], s[2:3], s0 +// GFX12: s_load_b128 s[24:27], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x46,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b128 s[96:99], s[2:3], s0 +// GFX12: s_load_b128 s[96:99], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x58,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b128 s[20:23], s[4:5], s0 +// GFX12: s_load_b128 s[20:23], s[4:5], s0 offset:0x0 ; encoding: [0x02,0x45,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b128 s[20:23], s[100:101], s0 +// GFX12: s_load_b128 s[20:23], s[100:101], s0 offset:0x0 ; encoding: [0x32,0x45,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b128 s[20:23], vcc, s0 +// GFX12: s_load_b128 s[20:23], vcc, s0 offset:0x0 ; encoding: [0x35,0x45,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b128 s[20:23], s[2:3], s101 +// GFX12: s_load_b128 s[20:23], s[2:3], s101 offset:0x0 ; encoding: [0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0xca] + +s_load_b128 s[20:23], s[2:3], vcc_lo +// GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 ; encoding: [0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0xd4] + +s_load_b128 s[20:23], s[2:3], vcc_hi +// GFX12: s_load_b128 s[20:23], s[2:3], vcc_hi offset:0x0 ; encoding: [0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0xd6] + +s_load_b128 s[20:23], s[2:3], m0 +// GFX12: s_load_b128 s[20:23], s[2:3], m0 offset:0x0 ; encoding: [0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0xfa] + +s_load_b128 s[20:23], s[2:3], 0x0 +// GFX12: s_load_b128 s[20:23], s[2:3], 0x0 ; encoding: [0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0xf8] + +s_load_b256 s[20:27], s[2:3], s0 +// GFX12: s_load_b256 s[20:27], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x65,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b256 s[24:31], s[2:3], s0 +// GFX12: s_load_b256 s[24:31], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x66,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b256 s[92:99], s[2:3], s0 +// GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x77,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b256 s[20:27], s[4:5], s0 +// GFX12: s_load_b256 s[20:27], s[4:5], s0 offset:0x0 ; encoding: [0x02,0x65,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b256 s[20:27], s[100:101], s0 +// GFX12: s_load_b256 s[20:27], s[100:101], s0 offset:0x0 ; encoding: [0x32,0x65,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b256 s[20:27], vcc, s0 +// GFX12: s_load_b256 s[20:27], vcc, s0 offset:0x0 ; encoding: [0x35,0x65,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b256 s[20:27], s[2:3], s101 +// GFX12: s_load_b256 s[20:27], s[2:3], s101 offset:0x0 ; encoding: [0x01,0x65,0x00,0xf4,0x00,0x00,0x00,0xca] + +s_load_b256 s[20:27], s[2:3], vcc_lo +// GFX12: s_load_b256 s[20:27], s[2:3], vcc_lo offset:0x0 ; encoding: [0x01,0x65,0x00,0xf4,0x00,0x00,0x00,0xd4] + +s_load_b256 s[20:27], s[2:3], vcc_hi +// GFX12: s_load_b256 s[20:27], s[2:3], vcc_hi offset:0x0 ; encoding: [0x01,0x65,0x00,0xf4,0x00,0x00,0x00,0xd6] + +s_load_b256 s[20:27], s[2:3], m0 +// GFX12: s_load_b256 s[20:27], s[2:3], m0 offset:0x0 ; encoding: [0x01,0x65,0x00,0xf4,0x00,0x00,0x00,0xfa] + +s_load_b256 s[20:27], s[2:3], 0x0 +// GFX12: s_load_b256 s[20:27], s[2:3], 0x0 ; encoding: [0x01,0x65,0x00,0xf4,0x00,0x00,0x00,0xf8] + +s_load_b512 s[20:35], s[2:3], s0 +// GFX12: s_load_b512 s[20:35], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b512 s[24:39], s[2:3], s0 +// GFX12: s_load_b512 s[24:39], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x86,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b512 s[84:99], s[2:3], s0 +// GFX12: s_load_b512 s[84:99], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x95,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b512 s[20:35], s[4:5], s0 +// GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 ; encoding: [0x02,0x85,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b512 s[20:35], s[100:101], s0 +// GFX12: s_load_b512 s[20:35], s[100:101], s0 offset:0x0 ; encoding: [0x32,0x85,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b512 s[20:35], vcc, s0 +// GFX12: s_load_b512 s[20:35], vcc, s0 offset:0x0 ; encoding: [0x35,0x85,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b512 s[20:35], s[2:3], s101 +// GFX12: s_load_b512 s[20:35], s[2:3], s101 offset:0x0 ; encoding: [0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0xca] + +s_load_b512 s[20:35], s[2:3], vcc_lo +// GFX12: s_load_b512 s[20:35], s[2:3], vcc_lo offset:0x0 ; encoding: [0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0xd4] + +s_load_b512 s[20:35], s[2:3], vcc_hi +// GFX12: s_load_b512 s[20:35], s[2:3], vcc_hi offset:0x0 ; encoding: [0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0xd6] + +s_load_b512 s[20:35], s[2:3], m0 +// GFX12: s_load_b512 s[20:35], s[2:3], m0 offset:0x0 ; encoding: [0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0xfa] + +s_load_b512 s[20:35], s[2:3], 0x0 +// GFX12: s_load_b512 s[20:35], s[2:3], 0x0 ; encoding: [0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0xf8] + +s_buffer_load_b32 s5, s[4:7], s0 +// GFX12: s_buffer_load_b32 s5, s[4:7], s0 offset:0x0 ; encoding: [0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b32 s101, s[4:7], s0 +// GFX12: s_buffer_load_b32 s101, s[4:7], s0 offset:0x0 ; encoding: [0x42,0x19,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b32 vcc_lo, s[4:7], s0 +// GFX12: s_buffer_load_b32 vcc_lo, s[4:7], s0 offset:0x0 ; encoding: [0x82,0x1a,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b32 vcc_hi, s[4:7], s0 +// GFX12: s_buffer_load_b32 vcc_hi, s[4:7], s0 offset:0x0 ; encoding: [0xc2,0x1a,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b32 s5, s[8:11], s0 +// GFX12: s_buffer_load_b32 s5, s[8:11], s0 offset:0x0 ; encoding: [0x44,0x01,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b32 s5, s[96:99], s0 +// GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 ; encoding: [0x70,0x01,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b32 s5, s[4:7], s101 +// GFX12: s_buffer_load_b32 s5, s[4:7], s101 offset:0x0 ; encoding: [0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0xca] + +s_buffer_load_b32 s5, s[4:7], vcc_lo +// GFX12: s_buffer_load_b32 s5, s[4:7], vcc_lo offset:0x0 ; encoding: [0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0xd4] + +s_buffer_load_b32 s5, s[4:7], vcc_hi +// GFX12: s_buffer_load_b32 s5, s[4:7], vcc_hi offset:0x0 ; encoding: [0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0xd6] + +s_buffer_load_b32 s5, s[4:7], m0 +// GFX12: s_buffer_load_b32 s5, s[4:7], m0 offset:0x0 ; encoding: [0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0xfa] + +s_buffer_load_b32 s5, s[4:7], 0x0 +// GFX12: s_buffer_load_b32 s5, s[4:7], 0x0 ; encoding: [0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0xf8] + +s_buffer_load_b64 s[10:11], s[4:7], s0 +// GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 ; encoding: [0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b64 s[12:13], s[4:7], s0 +// GFX12: s_buffer_load_b64 s[12:13], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x23,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b64 s[100:101], s[4:7], s0 +// GFX12: s_buffer_load_b64 s[100:101], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x39,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b64 vcc, s[4:7], s0 +// GFX12: s_buffer_load_b64 vcc, s[4:7], s0 offset:0x0 ; encoding: [0x82,0x3a,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b64 s[10:11], s[8:11], s0 +// GFX12: s_buffer_load_b64 s[10:11], s[8:11], s0 offset:0x0 ; encoding: [0x84,0x22,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b64 s[10:11], s[96:99], s0 +// GFX12: s_buffer_load_b64 s[10:11], s[96:99], s0 offset:0x0 ; encoding: [0xb0,0x22,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b64 s[10:11], s[4:7], s101 +// GFX12: s_buffer_load_b64 s[10:11], s[4:7], s101 offset:0x0 ; encoding: [0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0xca] + +s_buffer_load_b64 s[10:11], s[4:7], vcc_lo +// GFX12: s_buffer_load_b64 s[10:11], s[4:7], vcc_lo offset:0x0 ; encoding: [0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0xd4] + +s_buffer_load_b64 s[10:11], s[4:7], vcc_hi +// GFX12: s_buffer_load_b64 s[10:11], s[4:7], vcc_hi offset:0x0 ; encoding: [0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0xd6] + +s_buffer_load_b64 s[10:11], s[4:7], m0 +// GFX12: s_buffer_load_b64 s[10:11], s[4:7], m0 offset:0x0 ; encoding: [0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0xfa] + +s_buffer_load_b64 s[10:11], s[4:7], 0x0 +// GFX12: s_buffer_load_b64 s[10:11], s[4:7], 0x0 ; encoding: [0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0xf8] + +s_buffer_load_b96 s[20:22], s[4:7], s0 +// GFX12: s_buffer_load_b96 s[20:22], s[4:7], s0 offset:0x0 ; encoding: [0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b96 s[24:26], s[4:7], s0 +// GFX12: s_buffer_load_b96 s[24:26], s[4:7], s0 offset:0x0 ; encoding: [0x02,0xa6,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b96 s[96:98], s[4:7], s0 +// GFX12: s_buffer_load_b96 s[96:98], s[4:7], s0 offset:0x0 ; encoding: [0x02,0xb8,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b96 s[20:22], s[8:11], s0 +// GFX12: s_buffer_load_b96 s[20:22], s[8:11], s0 offset:0x0 ; encoding: [0x04,0xa5,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b96 s[20:22], s[96:99], s0 +// GFX12: s_buffer_load_b96 s[20:22], s[96:99], s0 offset:0x0 ; encoding: [0x30,0xa5,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b96 s[20:22], s[4:7], s101 +// GFX12: s_buffer_load_b96 s[20:22], s[4:7], s101 offset:0x0 ; encoding: [0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0xca] + +s_buffer_load_b96 s[20:22], s[4:7], vcc_lo +// GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 ; encoding: [0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0xd4] + +s_buffer_load_b96 s[20:22], s[4:7], vcc_hi +// GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_hi offset:0x0 ; encoding: [0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0xd6] + +s_buffer_load_b96 s[20:22], s[4:7], m0 +// GFX12: s_buffer_load_b96 s[20:22], s[4:7], m0 offset:0x0 ; encoding: [0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0xfa] + +s_buffer_load_b96 s[20:22], s[4:7], 0x0 +// GFX12: s_buffer_load_b96 s[20:22], s[4:7], 0x0 ; encoding: [0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0xf8] + +s_buffer_load_b128 s[20:23], s[4:7], s0 +// GFX12: s_buffer_load_b128 s[20:23], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b128 s[24:27], s[4:7], s0 +// GFX12: s_buffer_load_b128 s[24:27], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x46,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b128 s[96:99], s[4:7], s0 +// GFX12: s_buffer_load_b128 s[96:99], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x58,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b128 s[20:23], s[8:11], s0 +// GFX12: s_buffer_load_b128 s[20:23], s[8:11], s0 offset:0x0 ; encoding: [0x04,0x45,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b128 s[20:23], s[96:99], s0 +// GFX12: s_buffer_load_b128 s[20:23], s[96:99], s0 offset:0x0 ; encoding: [0x30,0x45,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b128 s[20:23], s[4:7], s101 +// GFX12: s_buffer_load_b128 s[20:23], s[4:7], s101 offset:0x0 ; encoding: [0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0xca] + +s_buffer_load_b128 s[20:23], s[4:7], vcc_lo +// GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 ; encoding: [0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0xd4] + +s_buffer_load_b128 s[20:23], s[4:7], vcc_hi +// GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_hi offset:0x0 ; encoding: [0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0xd6] + +s_buffer_load_b128 s[20:23], s[4:7], m0 +// GFX12: s_buffer_load_b128 s[20:23], s[4:7], m0 offset:0x0 ; encoding: [0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0xfa] + +s_buffer_load_b128 s[20:23], s[4:7], 0x0 +// GFX12: s_buffer_load_b128 s[20:23], s[4:7], 0x0 ; encoding: [0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0xf8] + +s_buffer_load_b256 s[20:27], s[4:7], s0 +// GFX12: s_buffer_load_b256 s[20:27], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x65,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b256 s[24:31], s[4:7], s0 +// GFX12: s_buffer_load_b256 s[24:31], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x66,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b256 s[92:99], s[4:7], s0 +// GFX12: s_buffer_load_b256 s[92:99], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x77,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b256 s[20:27], s[8:11], s0 +// GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 ; encoding: [0x04,0x65,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b256 s[20:27], s[96:99], s0 +// GFX12: s_buffer_load_b256 s[20:27], s[96:99], s0 offset:0x0 ; encoding: [0x30,0x65,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b256 s[20:27], s[4:7], s101 +// GFX12: s_buffer_load_b256 s[20:27], s[4:7], s101 offset:0x0 ; encoding: [0x02,0x65,0x02,0xf4,0x00,0x00,0x00,0xca] + +s_buffer_load_b256 s[20:27], s[4:7], vcc_lo +// GFX12: s_buffer_load_b256 s[20:27], s[4:7], vcc_lo offset:0x0 ; encoding: [0x02,0x65,0x02,0xf4,0x00,0x00,0x00,0xd4] + +s_buffer_load_b256 s[20:27], s[4:7], vcc_hi +// GFX12: s_buffer_load_b256 s[20:27], s[4:7], vcc_hi offset:0x0 ; encoding: [0x02,0x65,0x02,0xf4,0x00,0x00,0x00,0xd6] + +s_buffer_load_b256 s[20:27], s[4:7], m0 +// GFX12: s_buffer_load_b256 s[20:27], s[4:7], m0 offset:0x0 ; encoding: [0x02,0x65,0x02,0xf4,0x00,0x00,0x00,0xfa] + +s_buffer_load_b256 s[20:27], s[4:7], 0x0 +// GFX12: s_buffer_load_b256 s[20:27], s[4:7], 0x0 ; encoding: [0x02,0x65,0x02,0xf4,0x00,0x00,0x00,0xf8] + +s_buffer_load_b512 s[20:35], s[4:7], s0 +// GFX12: s_buffer_load_b512 s[20:35], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x85,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b512 s[24:39], s[4:7], s0 +// GFX12: s_buffer_load_b512 s[24:39], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x86,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b512 s[84:99], s[4:7], s0 +// GFX12: s_buffer_load_b512 s[84:99], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x95,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b512 s[20:35], s[8:11], s0 +// GFX12: s_buffer_load_b512 s[20:35], s[8:11], s0 offset:0x0 ; encoding: [0x04,0x85,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b512 s[20:35], s[96:99], s0 +// GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 ; encoding: [0x30,0x85,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b512 s[20:35], s[4:7], s101 +// GFX12: s_buffer_load_b512 s[20:35], s[4:7], s101 offset:0x0 ; encoding: [0x02,0x85,0x02,0xf4,0x00,0x00,0x00,0xca] + +s_buffer_load_b512 s[20:35], s[4:7], vcc_lo +// GFX12: s_buffer_load_b512 s[20:35], s[4:7], vcc_lo offset:0x0 ; encoding: [0x02,0x85,0x02,0xf4,0x00,0x00,0x00,0xd4] + +s_buffer_load_b512 s[20:35], s[4:7], vcc_hi +// GFX12: s_buffer_load_b512 s[20:35], s[4:7], vcc_hi offset:0x0 ; encoding: [0x02,0x85,0x02,0xf4,0x00,0x00,0x00,0xd6] + +s_buffer_load_b512 s[20:35], s[4:7], m0 +// GFX12: s_buffer_load_b512 s[20:35], s[4:7], m0 offset:0x0 ; encoding: [0x02,0x85,0x02,0xf4,0x00,0x00,0x00,0xfa] + +s_buffer_load_b512 s[20:35], s[4:7], 0x0 +// GFX12: s_buffer_load_b512 s[20:35], s[4:7], 0x0 ; encoding: [0x02,0x85,0x02,0xf4,0x00,0x00,0x00,0xf8] + +s_dcache_inv +// GFX12: s_dcache_inv ; encoding: [0x00,0x20,0x04,0xf4,0x00,0x00,0x00,0x00] + +s_atc_probe 7, s[4:5], s2 +// GFX12: encoding: [0xc2,0x41,0x04,0xf4,0x00,0x00,0x00,0x04] + +s_atc_probe 7, s[4:5], 0x64 +// GFX12: encoding: [0xc2,0x41,0x04,0xf4,0x64,0x00,0x00,0xf8] + +s_atc_probe 7, s[4:5], s9 offset:0x64 +// GFX12: encoding: [0xc2,0x41,0x04,0xf4,0x64,0x00,0x00,0x12] + +s_atc_probe_buffer 7, s[8:11], s2 +// GFX12: encoding: [0xc4,0x61,0x04,0xf4,0x00,0x00,0x00,0x04] + +s_atc_probe_buffer 7, s[8:11], 0x64 +// GFX12: encoding: [0xc4,0x61,0x04,0xf4,0x64,0x00,0x00,0xf8] + +s_atc_probe_buffer 7, s[8:11], s9 offset:0x64 +// GFX12: encoding: [0xc4,0x61,0x04,0xf4,0x64,0x00,0x00,0x12] + +s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_RT +// GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 ; encoding: [0x42,0x01,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_NT +// GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_NT ; encoding: [0x42,0x01,0x80,0xf4,0x00,0x00,0x00,0x00] + +s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_HT +// GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_HT ; encoding: [0x42,0x01,0x00,0xf5,0x00,0x00,0x00,0x00] + +s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_LU +// GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_LU ; encoding: [0x42,0x01,0x80,0xf5,0x00,0x00,0x00,0x00] + +s_load_b32 s5, s[4:5], s0 offset:0x0 scope:SCOPE_CU +// GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 ; encoding: [0x42,0x01,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b32 s5, s[4:5], s0 offset:0x0 scope:SCOPE_SE +// GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 scope:SCOPE_SE ; encoding: [0x42,0x01,0x20,0xf4,0x00,0x00,0x00,0x00] + +s_load_b32 s5, s[4:5], s0 offset:0x0 scope:SCOPE_DEV +// GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 scope:SCOPE_DEV ; encoding: [0x42,0x01,0x40,0xf4,0x00,0x00,0x00,0x00] + +s_load_b32 s5, s[4:5], s0 offset:0x0 scope:SCOPE_SYS +// GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 scope:SCOPE_SYS ; encoding: [0x42,0x01,0x60,0xf4,0x00,0x00,0x00,0x00] + +s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE +// GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x42,0x01,0x20,0xf5,0x00,0x00,0x00,0x00] + +s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE +// GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x42,0x01,0xa0,0xf5,0x00,0x00,0x00,0x00] + +s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS +// GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x42,0x01,0x60,0xf5,0x00,0x00,0x00,0x00] + +s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_RT +// GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 ; encoding: [0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xfa] + +s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_NT +// GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_NT ; encoding: [0x81,0x22,0x80,0xf4,0x00,0x00,0x00,0xfa] + +s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_HT +// GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_HT ; encoding: [0x81,0x22,0x00,0xf5,0x00,0x00,0x00,0xfa] + +s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_LU +// GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_LU ; encoding: [0x81,0x22,0x80,0xf5,0x00,0x00,0x00,0xfa] + +s_load_b64 s[10:11], s[2:3], m0 offset:0x0 scope:SCOPE_CU +// GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 ; encoding: [0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xfa] + +s_load_b64 s[10:11], s[2:3], m0 offset:0x0 scope:SCOPE_SE +// GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 scope:SCOPE_SE ; encoding: [0x81,0x22,0x20,0xf4,0x00,0x00,0x00,0xfa] + +s_load_b64 s[10:11], s[2:3], m0 offset:0x0 scope:SCOPE_DEV +// GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 scope:SCOPE_DEV ; encoding: [0x81,0x22,0x40,0xf4,0x00,0x00,0x00,0xfa] + +s_load_b64 s[10:11], s[2:3], m0 offset:0x0 scope:SCOPE_SYS +// GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 scope:SCOPE_SYS ; encoding: [0x81,0x22,0x60,0xf4,0x00,0x00,0x00,0xfa] + +s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE +// GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x81,0x22,0x20,0xf5,0x00,0x00,0x00,0xfa] + +s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE +// GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x81,0x22,0xa0,0xf5,0x00,0x00,0x00,0xfa] + +s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS +// GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x81,0x22,0x60,0xf5,0x00,0x00,0x00,0xfa] + +s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_RT +// GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 ; encoding: [0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xd4] + +s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_NT +// GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_NT ; encoding: [0x01,0xa5,0x80,0xf4,0x00,0x00,0x00,0xd4] + +s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_HT +// GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_HT ; encoding: [0x01,0xa5,0x00,0xf5,0x00,0x00,0x00,0xd4] + +s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_LU +// GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_LU ; encoding: [0x01,0xa5,0x80,0xf5,0x00,0x00,0x00,0xd4] + +s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 scope:SCOPE_CU +// GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 ; encoding: [0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xd4] + +s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 scope:SCOPE_SE +// GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 scope:SCOPE_SE ; encoding: [0x01,0xa5,0x20,0xf4,0x00,0x00,0x00,0xd4] + +s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 scope:SCOPE_DEV +// GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 scope:SCOPE_DEV ; encoding: [0x01,0xa5,0x40,0xf4,0x00,0x00,0x00,0xd4] + +s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 scope:SCOPE_SYS +// GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 scope:SCOPE_SYS ; encoding: [0x01,0xa5,0x60,0xf4,0x00,0x00,0x00,0xd4] + +s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE +// GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x01,0xa5,0x20,0xf5,0x00,0x00,0x00,0xd4] + +s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE +// GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x01,0xa5,0xa0,0xf5,0x00,0x00,0x00,0xd4] + +s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS +// GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x01,0xa5,0x60,0xf5,0x00,0x00,0x00,0xd4] + +s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_RT +// GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 ; encoding: [0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0xd4] + +s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_NT +// GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_NT ; encoding: [0x01,0x45,0x80,0xf4,0x00,0x00,0x00,0xd4] + +s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_HT +// GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_HT ; encoding: [0x01,0x45,0x00,0xf5,0x00,0x00,0x00,0xd4] + +s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_LU +// GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_LU ; encoding: [0x01,0x45,0x80,0xf5,0x00,0x00,0x00,0xd4] + +s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 scope:SCOPE_CU +// GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 ; encoding: [0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0xd4] + +s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 scope:SCOPE_SE +// GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 scope:SCOPE_SE ; encoding: [0x01,0x45,0x20,0xf4,0x00,0x00,0x00,0xd4] + +s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 scope:SCOPE_DEV +// GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 scope:SCOPE_DEV ; encoding: [0x01,0x45,0x40,0xf4,0x00,0x00,0x00,0xd4] + +s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 scope:SCOPE_SYS +// GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 scope:SCOPE_SYS ; encoding: [0x01,0x45,0x60,0xf4,0x00,0x00,0x00,0xd4] + +s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE +// GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x01,0x45,0x20,0xf5,0x00,0x00,0x00,0xd4] + +s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE +// GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x01,0x45,0xa0,0xf5,0x00,0x00,0x00,0xd4] + +s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS +// GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x01,0x45,0x60,0xf5,0x00,0x00,0x00,0xd4] + +s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_RT +// GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x77,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_NT +// GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_NT ; encoding: [0x01,0x77,0x80,0xf4,0x00,0x00,0x00,0x00] + +s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_HT +// GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_HT ; encoding: [0x01,0x77,0x00,0xf5,0x00,0x00,0x00,0x00] + +s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_LU +// GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_LU ; encoding: [0x01,0x77,0x80,0xf5,0x00,0x00,0x00,0x00] + +s_load_b256 s[92:99], s[2:3], s0 offset:0x0 scope:SCOPE_CU +// GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x77,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b256 s[92:99], s[2:3], s0 offset:0x0 scope:SCOPE_SE +// GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 scope:SCOPE_SE ; encoding: [0x01,0x77,0x20,0xf4,0x00,0x00,0x00,0x00] + +s_load_b256 s[92:99], s[2:3], s0 offset:0x0 scope:SCOPE_DEV +// GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 scope:SCOPE_DEV ; encoding: [0x01,0x77,0x40,0xf4,0x00,0x00,0x00,0x00] + +s_load_b256 s[92:99], s[2:3], s0 offset:0x0 scope:SCOPE_SYS +// GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 scope:SCOPE_SYS ; encoding: [0x01,0x77,0x60,0xf4,0x00,0x00,0x00,0x00] + +s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE +// GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x01,0x77,0x20,0xf5,0x00,0x00,0x00,0x00] + +s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE +// GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x01,0x77,0xa0,0xf5,0x00,0x00,0x00,0x00] + +s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS +// GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x01,0x77,0x60,0xf5,0x00,0x00,0x00,0x00] + +s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_RT +// GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 ; encoding: [0x02,0x85,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_NT +// GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_NT ; encoding: [0x02,0x85,0x80,0xf4,0x00,0x00,0x00,0x00] + +s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_HT +// GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_HT ; encoding: [0x02,0x85,0x00,0xf5,0x00,0x00,0x00,0x00] + +s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_LU +// GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_LU ; encoding: [0x02,0x85,0x80,0xf5,0x00,0x00,0x00,0x00] + +s_load_b512 s[20:35], s[4:5], s0 offset:0x0 scope:SCOPE_CU +// GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 ; encoding: [0x02,0x85,0x00,0xf4,0x00,0x00,0x00,0x00] + +s_load_b512 s[20:35], s[4:5], s0 offset:0x0 scope:SCOPE_SE +// GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 scope:SCOPE_SE ; encoding: [0x02,0x85,0x20,0xf4,0x00,0x00,0x00,0x00] + +s_load_b512 s[20:35], s[4:5], s0 offset:0x0 scope:SCOPE_DEV +// GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 scope:SCOPE_DEV ; encoding: [0x02,0x85,0x40,0xf4,0x00,0x00,0x00,0x00] + +s_load_b512 s[20:35], s[4:5], s0 offset:0x0 scope:SCOPE_SYS +// GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 scope:SCOPE_SYS ; encoding: [0x02,0x85,0x60,0xf4,0x00,0x00,0x00,0x00] + +s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE +// GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x02,0x85,0x20,0xf5,0x00,0x00,0x00,0x00] + +s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE +// GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x02,0x85,0xa0,0xf5,0x00,0x00,0x00,0x00] + +s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS +// GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x02,0x85,0x60,0xf5,0x00,0x00,0x00,0x00] + +s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_RT +// GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 ; encoding: [0x70,0x01,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_NT +// GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_NT ; encoding: [0x70,0x01,0x82,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_HT +// GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_HT ; encoding: [0x70,0x01,0x02,0xf5,0x00,0x00,0x00,0x00] + +s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_LU +// GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_LU ; encoding: [0x70,0x01,0x82,0xf5,0x00,0x00,0x00,0x00] + +s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 scope:SCOPE_CU +// GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 ; encoding: [0x70,0x01,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 scope:SCOPE_SE +// GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 scope:SCOPE_SE ; encoding: [0x70,0x01,0x22,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 scope:SCOPE_DEV +// GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 scope:SCOPE_DEV ; encoding: [0x70,0x01,0x42,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 scope:SCOPE_SYS +// GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 scope:SCOPE_SYS ; encoding: [0x70,0x01,0x62,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE +// GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x70,0x01,0x22,0xf5,0x00,0x00,0x00,0x00] + +s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE +// GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x70,0x01,0xa2,0xf5,0x00,0x00,0x00,0x00] + +s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS +// GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x70,0x01,0x62,0xf5,0x00,0x00,0x00,0x00] + +s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_RT +// GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 ; encoding: [0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_NT +// GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_NT ; encoding: [0x82,0x22,0x82,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_HT +// GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_HT ; encoding: [0x82,0x22,0x02,0xf5,0x00,0x00,0x00,0x00] + +s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_LU +// GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_LU ; encoding: [0x82,0x22,0x82,0xf5,0x00,0x00,0x00,0x00] + +s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 scope:SCOPE_CU +// GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 ; encoding: [0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 scope:SCOPE_SE +// GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 scope:SCOPE_SE ; encoding: [0x82,0x22,0x22,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 scope:SCOPE_DEV +// GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 scope:SCOPE_DEV ; encoding: [0x82,0x22,0x42,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 scope:SCOPE_SYS +// GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 scope:SCOPE_SYS ; encoding: [0x82,0x22,0x62,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE +// GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x82,0x22,0x22,0xf5,0x00,0x00,0x00,0x00] + +s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE +// GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x82,0x22,0xa2,0xf5,0x00,0x00,0x00,0x00] + +s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS +// GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x82,0x22,0x62,0xf5,0x00,0x00,0x00,0x00] + +s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_RT +// GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 ; encoding: [0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0xd4] + +s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_NT +// GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_NT ; encoding: [0x02,0xa5,0x82,0xf4,0x00,0x00,0x00,0xd4] + +s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_HT +// GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_HT ; encoding: [0x02,0xa5,0x02,0xf5,0x00,0x00,0x00,0xd4] + +s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_LU +// GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_LU ; encoding: [0x02,0xa5,0x82,0xf5,0x00,0x00,0x00,0xd4] + +s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 scope:SCOPE_CU +// GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 ; encoding: [0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0xd4] + +s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 scope:SCOPE_SE +// GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 scope:SCOPE_SE ; encoding: [0x02,0xa5,0x22,0xf4,0x00,0x00,0x00,0xd4] + +s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 scope:SCOPE_DEV +// GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 scope:SCOPE_DEV ; encoding: [0x02,0xa5,0x42,0xf4,0x00,0x00,0x00,0xd4] + +s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 scope:SCOPE_SYS +// GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 scope:SCOPE_SYS ; encoding: [0x02,0xa5,0x62,0xf4,0x00,0x00,0x00,0xd4] + +s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE +// GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x02,0xa5,0x22,0xf5,0x00,0x00,0x00,0xd4] + +s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE +// GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x02,0xa5,0xa2,0xf5,0x00,0x00,0x00,0xd4] + +s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS +// GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x02,0xa5,0x62,0xf5,0x00,0x00,0x00,0xd4] + +s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_RT +// GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 ; encoding: [0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0xd4] + +s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_NT +// GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_NT ; encoding: [0x02,0x45,0x82,0xf4,0x00,0x00,0x00,0xd4] + +s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_HT +// GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_HT ; encoding: [0x02,0x45,0x02,0xf5,0x00,0x00,0x00,0xd4] + +s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_LU +// GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_LU ; encoding: [0x02,0x45,0x82,0xf5,0x00,0x00,0x00,0xd4] + +s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 scope:SCOPE_CU +// GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 ; encoding: [0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0xd4] + +s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 scope:SCOPE_SE +// GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 scope:SCOPE_SE ; encoding: [0x02,0x45,0x22,0xf4,0x00,0x00,0x00,0xd4] + +s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 scope:SCOPE_DEV +// GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 scope:SCOPE_DEV ; encoding: [0x02,0x45,0x42,0xf4,0x00,0x00,0x00,0xd4] + +s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 scope:SCOPE_SYS +// GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 scope:SCOPE_SYS ; encoding: [0x02,0x45,0x62,0xf4,0x00,0x00,0x00,0xd4] + +s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE +// GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x02,0x45,0x22,0xf5,0x00,0x00,0x00,0xd4] + +s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE +// GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x02,0x45,0xa2,0xf5,0x00,0x00,0x00,0xd4] + +s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS +// GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x02,0x45,0x62,0xf5,0x00,0x00,0x00,0xd4] + +s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_RT +// GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 ; encoding: [0x04,0x65,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_NT +// GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_NT ; encoding: [0x04,0x65,0x82,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_HT +// GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_HT ; encoding: [0x04,0x65,0x02,0xf5,0x00,0x00,0x00,0x00] + +s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_LU +// GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_LU ; encoding: [0x04,0x65,0x82,0xf5,0x00,0x00,0x00,0x00] + +s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 scope:SCOPE_CU +// GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 ; encoding: [0x04,0x65,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 scope:SCOPE_SE +// GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 scope:SCOPE_SE ; encoding: [0x04,0x65,0x22,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 scope:SCOPE_DEV +// GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 scope:SCOPE_DEV ; encoding: [0x04,0x65,0x42,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 scope:SCOPE_SYS +// GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 scope:SCOPE_SYS ; encoding: [0x04,0x65,0x62,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE +// GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x04,0x65,0x22,0xf5,0x00,0x00,0x00,0x00] + +s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE +// GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x04,0x65,0xa2,0xf5,0x00,0x00,0x00,0x00] + +s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS +// GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x04,0x65,0x62,0xf5,0x00,0x00,0x00,0x00] + +s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_RT +// GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 ; encoding: [0x30,0x85,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_NT +// GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_NT ; encoding: [0x30,0x85,0x82,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_HT +// GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_HT ; encoding: [0x30,0x85,0x02,0xf5,0x00,0x00,0x00,0x00] + +s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_LU +// GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_LU ; encoding: [0x30,0x85,0x82,0xf5,0x00,0x00,0x00,0x00] + +s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 scope:SCOPE_CU +// GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 ; encoding: [0x30,0x85,0x02,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 scope:SCOPE_SE +// GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 scope:SCOPE_SE ; encoding: [0x30,0x85,0x22,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 scope:SCOPE_DEV +// GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 scope:SCOPE_DEV ; encoding: [0x30,0x85,0x42,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 scope:SCOPE_SYS +// GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 scope:SCOPE_SYS ; encoding: [0x30,0x85,0x62,0xf4,0x00,0x00,0x00,0x00] + +s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE +// GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x30,0x85,0x22,0xf5,0x00,0x00,0x00,0x00] + +s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE +// GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x30,0x85,0xa2,0xf5,0x00,0x00,0x00,0x00] + +s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS +// GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x30,0x85,0x62,0xf5,0x00,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_err.s b/llvm/test/MC/AMDGPU/gfx12_err.s index b23e60082ffef..b103d7cef9769 100644 --- a/llvm/test/MC/AMDGPU/gfx12_err.s +++ b/llvm/test/MC/AMDGPU/gfx12_err.s @@ -36,6 +36,15 @@ image_store v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_STORE_RT_WB scope: image_store v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_STORE_BYPASS scope:SCOPE_DEV // GFX12-ERR: [[@LINE-1]]:{{[0-9]+}}: error: scope and th combination is not valid +s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_NT_RT +// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid th value for SMEM instruction + +s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_RT_NT +// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid th value for SMEM instruction + +s_load_b128 s[20:23], s[2:3], vcc_lo th:TH_LOAD_NT_HT +// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid th value for SMEM instruction + image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_LOAD_HT scope:SCOPE_SE th:TH_LOAD_HT // GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_smem.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_smem.txt index 7843905797a62..f46e74537b9c3 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_smem.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_smem.txt @@ -1,5 +1,398 @@ # RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12 %s +# GFX12: s_load_b32 s101, s[2:3], s0 offset:0x0 ; encoding: [0x41,0x19,0x00,0xf4,0x00,0x00,0x00,0x00] +0x41,0x19,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b32 s5, s[100:101], s0 offset:0x0 ; encoding: [0x72,0x01,0x00,0xf4,0x00,0x00,0x00,0x00] +0x72,0x01,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b32 s5, s[2:3], 0x0 ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xf8] +0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xf8 + +# GFX12: s_load_b32 s5, s[2:3], m0 offset:0x0 ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xfa] +0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xfa + +# GFX12: s_load_b32 s5, s[2:3], s0 offset:0x0 ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0x00] +0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b32 s5, s[2:3], s101 offset:0x0 ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xca] +0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xca + +# GFX12: s_load_b32 s5, s[2:3], vcc_hi offset:0x0 ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xd6] +0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xd6 + +# GFX12: s_load_b32 s5, s[2:3], vcc_lo offset:0x0 ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xd4] +0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xd4 + +# GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 ; encoding: [0x42,0x01,0x00,0xf4,0x00,0x00,0x00,0x00] +0x42,0x01,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b32 s5, vcc, s0 offset:0x0 ; encoding: [0x75,0x01,0x00,0xf4,0x00,0x00,0x00,0x00] +0x75,0x01,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b32 vcc_hi, s[2:3], s0 offset:0x0 ; encoding: [0xc1,0x1a,0x00,0xf4,0x00,0x00,0x00,0x00] +0xc1,0x1a,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b32 vcc_lo, s[2:3], s0 offset:0x0 ; encoding: [0x81,0x1a,0x00,0xf4,0x00,0x00,0x00,0x00] +0x81,0x1a,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b512 s[20:35], s[100:101], s0 offset:0x0 ; encoding: [0x32,0x85,0x00,0xf4,0x00,0x00,0x00,0x00] +0x32,0x85,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b512 s[20:35], s[2:3], 0x0 ; encoding: [0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0xf8] +0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0xf8 + +# GFX12: s_load_b512 s[20:35], s[2:3], m0 offset:0x0 ; encoding: [0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0xfa] +0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0xfa + +# GFX12: s_load_b512 s[20:35], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0x00] +0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b512 s[20:35], s[2:3], s101 offset:0x0 ; encoding: [0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0xca] +0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0xca + +# GFX12: s_load_b512 s[20:35], s[2:3], vcc_hi offset:0x0 ; encoding: [0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0xd6] +0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0xd6 + +# GFX12: s_load_b512 s[20:35], s[2:3], vcc_lo offset:0x0 ; encoding: [0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0xd4] +0x01,0x85,0x00,0xf4,0x00,0x00,0x00,0xd4 + +# GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 ; encoding: [0x02,0x85,0x00,0xf4,0x00,0x00,0x00,0x00] +0x02,0x85,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b512 s[20:35], vcc, s0 offset:0x0 ; encoding: [0x35,0x85,0x00,0xf4,0x00,0x00,0x00,0x00] +0x35,0x85,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b512 s[24:39], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x86,0x00,0xf4,0x00,0x00,0x00,0x00] +0x01,0x86,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b512 s[84:99], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x95,0x00,0xf4,0x00,0x00,0x00,0x00] +0x01,0x95,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b64 s[100:101], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x39,0x00,0xf4,0x00,0x00,0x00,0x00] +0x01,0x39,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b64 s[10:11], s[100:101], s0 offset:0x0 ; encoding: [0xb2,0x22,0x00,0xf4,0x00,0x00,0x00,0x00] +0xb2,0x22,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b64 s[10:11], s[2:3], 0x0 ; encoding: [0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xf8] +0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xf8 + +# GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 ; encoding: [0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xfa] +0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xfa + +# GFX12: s_load_b64 s[10:11], s[2:3], s0 offset:0x0 ; encoding: [0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0x00] +0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b64 s[10:11], s[2:3], s101 offset:0x0 ; encoding: [0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xca] +0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xca + +# GFX12: s_load_b64 s[10:11], s[2:3], vcc_hi offset:0x0 ; encoding: [0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xd6] +0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xd6 + +# GFX12: s_load_b64 s[10:11], s[2:3], vcc_lo offset:0x0 ; encoding: [0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xd4] +0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xd4 + +# GFX12: s_load_b64 s[10:11], s[4:5], s0 offset:0x0 ; encoding: [0x82,0x22,0x00,0xf4,0x00,0x00,0x00,0x00] +0x82,0x22,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b64 s[10:11], vcc, s0 offset:0x0 ; encoding: [0xb5,0x22,0x00,0xf4,0x00,0x00,0x00,0x00] +0xb5,0x22,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b64 s[12:13], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x23,0x00,0xf4,0x00,0x00,0x00,0x00] +0x01,0x23,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b64 vcc, s[2:3], s0 offset:0x0 ; encoding: [0x81,0x3a,0x00,0xf4,0x00,0x00,0x00,0x00] +0x81,0x3a,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b96 s[20:22], s[100:101], s0 offset:0x0 ; encoding: [0x32,0xa5,0x00,0xf4,0x00,0x00,0x00,0x00] +0x32,0xa5,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b96 s[20:22], s[2:3], 0x0 ; encoding: [0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xf8] +0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xf8 + +# GFX12: s_load_b96 s[20:22], s[2:3], m0 offset:0x0 ; encoding: [0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xfa] +0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xfa + +# GFX12: s_load_b96 s[20:22], s[2:3], s0 offset:0x0 ; encoding: [0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0x00] +0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b96 s[20:22], s[2:3], s101 offset:0x0 ; encoding: [0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xca] +0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xca + +# GFX12: s_load_b96 s[20:22], s[2:3], vcc_hi offset:0x0 ; encoding: [0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xd6] +0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xd6 + +# GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 ; encoding: [0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xd4] +0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xd4 + +# GFX12: s_load_b96 s[20:22], s[4:5], s0 offset:0x0 ; encoding: [0x02,0xa5,0x00,0xf4,0x00,0x00,0x00,0x00] +0x02,0xa5,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b96 s[20:22], vcc, s0 offset:0x0 ; encoding: [0x35,0xa5,0x00,0xf4,0x00,0x00,0x00,0x00] +0x35,0xa5,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b96 s[24:26], s[2:3], s0 offset:0x0 ; encoding: [0x01,0xa6,0x00,0xf4,0x00,0x00,0x00,0x00] +0x01,0xa6,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b96 s[96:98], s[2:3], s0 offset:0x0 ; encoding: [0x01,0xb8,0x00,0xf4,0x00,0x00,0x00,0x00] +0x01,0xb8,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b128 s[20:23], s[100:101], s0 offset:0x0 ; encoding: [0x32,0x45,0x00,0xf4,0x00,0x00,0x00,0x00] +0x32,0x45,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b128 s[20:23], s[2:3], 0x0 ; encoding: [0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0xf8] +0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0xf8 + +# GFX12: s_load_b128 s[20:23], s[2:3], m0 offset:0x0 ; encoding: [0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0xfa] +0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0xfa + +# GFX12: s_load_b128 s[20:23], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0x00] +0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b128 s[20:23], s[2:3], s101 offset:0x0 ; encoding: [0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0xca] +0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0xca + +# GFX12: s_load_b128 s[20:23], s[2:3], vcc_hi offset:0x0 ; encoding: [0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0xd6] +0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0xd6 + +# GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 ; encoding: [0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0xd4] +0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0xd4 + +# GFX12: s_load_b128 s[20:23], s[4:5], s0 offset:0x0 ; encoding: [0x02,0x45,0x00,0xf4,0x00,0x00,0x00,0x00] +0x02,0x45,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b128 s[20:23], vcc, s0 offset:0x0 ; encoding: [0x35,0x45,0x00,0xf4,0x00,0x00,0x00,0x00] +0x35,0x45,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b128 s[24:27], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x46,0x00,0xf4,0x00,0x00,0x00,0x00] +0x01,0x46,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b128 s[96:99], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x58,0x00,0xf4,0x00,0x00,0x00,0x00] +0x01,0x58,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b256 s[20:27], s[100:101], s0 offset:0x0 ; encoding: [0x32,0x65,0x00,0xf4,0x00,0x00,0x00,0x00] +0x32,0x65,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b256 s[20:27], s[2:3], 0x0 ; encoding: [0x01,0x65,0x00,0xf4,0x00,0x00,0x00,0xf8] +0x01,0x65,0x00,0xf4,0x00,0x00,0x00,0xf8 + +# GFX12: s_load_b256 s[20:27], s[2:3], m0 offset:0x0 ; encoding: [0x01,0x65,0x00,0xf4,0x00,0x00,0x00,0xfa] +0x01,0x65,0x00,0xf4,0x00,0x00,0x00,0xfa + +# GFX12: s_load_b256 s[20:27], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x65,0x00,0xf4,0x00,0x00,0x00,0x00] +0x01,0x65,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b256 s[20:27], s[2:3], s101 offset:0x0 ; encoding: [0x01,0x65,0x00,0xf4,0x00,0x00,0x00,0xca] +0x01,0x65,0x00,0xf4,0x00,0x00,0x00,0xca + +# GFX12: s_load_b256 s[20:27], s[2:3], vcc_hi offset:0x0 ; encoding: [0x01,0x65,0x00,0xf4,0x00,0x00,0x00,0xd6] +0x01,0x65,0x00,0xf4,0x00,0x00,0x00,0xd6 + +# GFX12: s_load_b256 s[20:27], s[2:3], vcc_lo offset:0x0 ; encoding: [0x01,0x65,0x00,0xf4,0x00,0x00,0x00,0xd4] +0x01,0x65,0x00,0xf4,0x00,0x00,0x00,0xd4 + +# GFX12: s_load_b256 s[20:27], s[4:5], s0 offset:0x0 ; encoding: [0x02,0x65,0x00,0xf4,0x00,0x00,0x00,0x00] +0x02,0x65,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b256 s[20:27], vcc, s0 offset:0x0 ; encoding: [0x35,0x65,0x00,0xf4,0x00,0x00,0x00,0x00] +0x35,0x65,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b256 s[24:31], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x66,0x00,0xf4,0x00,0x00,0x00,0x00] +0x01,0x66,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x77,0x00,0xf4,0x00,0x00,0x00,0x00] +0x01,0x77,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b32 s101, s[4:7], s0 offset:0x0 ; encoding: [0x42,0x19,0x02,0xf4,0x00,0x00,0x00,0x00] +0x42,0x19,0x02,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b32 s5, s[4:7], 0x0 ; encoding: [0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0xf8] +0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0xf8 + +# GFX12: s_buffer_load_b32 s5, s[4:7], m0 offset:0x0 ; encoding: [0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0xfa] +0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0xfa + +# GFX12: s_buffer_load_b32 s5, s[4:7], s0 offset:0x0 ; encoding: [0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0x00] +0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b32 s5, s[4:7], s101 offset:0x0 ; encoding: [0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0xca] +0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0xca + +# GFX12: s_buffer_load_b32 s5, s[4:7], vcc_hi offset:0x0 ; encoding: [0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0xd6] +0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0xd6 + +# GFX12: s_buffer_load_b32 s5, s[4:7], vcc_lo offset:0x0 ; encoding: [0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0xd4] +0x42,0x01,0x02,0xf4,0x00,0x00,0x00,0xd4 + +# GFX12: s_buffer_load_b32 s5, s[8:11], s0 offset:0x0 ; encoding: [0x44,0x01,0x02,0xf4,0x00,0x00,0x00,0x00] +0x44,0x01,0x02,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 ; encoding: [0x70,0x01,0x02,0xf4,0x00,0x00,0x00,0x00] +0x70,0x01,0x02,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b32 vcc_hi, s[4:7], s0 offset:0x0 ; encoding: [0xc2,0x1a,0x02,0xf4,0x00,0x00,0x00,0x00] +0xc2,0x1a,0x02,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b32 vcc_lo, s[4:7], s0 offset:0x0 ; encoding: [0x82,0x1a,0x02,0xf4,0x00,0x00,0x00,0x00] +0x82,0x1a,0x02,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b512 s[20:35], s[4:7], 0x0 ; encoding: [0x02,0x85,0x02,0xf4,0x00,0x00,0x00,0xf8] +0x02,0x85,0x02,0xf4,0x00,0x00,0x00,0xf8 + +# GFX12: s_buffer_load_b512 s[20:35], s[4:7], m0 offset:0x0 ; encoding: [0x02,0x85,0x02,0xf4,0x00,0x00,0x00,0xfa] +0x02,0x85,0x02,0xf4,0x00,0x00,0x00,0xfa + +# GFX12: s_buffer_load_b512 s[20:35], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x85,0x02,0xf4,0x00,0x00,0x00,0x00] +0x02,0x85,0x02,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b512 s[20:35], s[4:7], s101 offset:0x0 ; encoding: [0x02,0x85,0x02,0xf4,0x00,0x00,0x00,0xca] +0x02,0x85,0x02,0xf4,0x00,0x00,0x00,0xca + +# GFX12: s_buffer_load_b512 s[20:35], s[4:7], vcc_hi offset:0x0 ; encoding: [0x02,0x85,0x02,0xf4,0x00,0x00,0x00,0xd6] +0x02,0x85,0x02,0xf4,0x00,0x00,0x00,0xd6 + +# GFX12: s_buffer_load_b512 s[20:35], s[4:7], vcc_lo offset:0x0 ; encoding: [0x02,0x85,0x02,0xf4,0x00,0x00,0x00,0xd4] +0x02,0x85,0x02,0xf4,0x00,0x00,0x00,0xd4 + +# GFX12: s_buffer_load_b512 s[20:35], s[8:11], s0 offset:0x0 ; encoding: [0x04,0x85,0x02,0xf4,0x00,0x00,0x00,0x00] +0x04,0x85,0x02,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 ; encoding: [0x30,0x85,0x02,0xf4,0x00,0x00,0x00,0x00] +0x30,0x85,0x02,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b512 s[24:39], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x86,0x02,0xf4,0x00,0x00,0x00,0x00] +0x02,0x86,0x02,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b512 s[84:99], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x95,0x02,0xf4,0x00,0x00,0x00,0x00] +0x02,0x95,0x02,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b64 s[100:101], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x39,0x02,0xf4,0x00,0x00,0x00,0x00] +0x02,0x39,0x02,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b64 s[10:11], s[4:7], 0x0 ; encoding: [0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0xf8] +0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0xf8 + +# GFX12: s_buffer_load_b64 s[10:11], s[4:7], m0 offset:0x0 ; encoding: [0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0xfa] +0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0xfa + +# GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 ; encoding: [0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0x00] +0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b64 s[10:11], s[4:7], s101 offset:0x0 ; encoding: [0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0xca] +0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0xca + +# GFX12: s_buffer_load_b64 s[10:11], s[4:7], vcc_hi offset:0x0 ; encoding: [0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0xd6] +0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0xd6 + +# GFX12: s_buffer_load_b64 s[10:11], s[4:7], vcc_lo offset:0x0 ; encoding: [0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0xd4] +0x82,0x22,0x02,0xf4,0x00,0x00,0x00,0xd4 + +# GFX12: s_buffer_load_b64 s[10:11], s[8:11], s0 offset:0x0 ; encoding: [0x84,0x22,0x02,0xf4,0x00,0x00,0x00,0x00] +0x84,0x22,0x02,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b64 s[10:11], s[96:99], s0 offset:0x0 ; encoding: [0xb0,0x22,0x02,0xf4,0x00,0x00,0x00,0x00] +0xb0,0x22,0x02,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b64 s[12:13], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x23,0x02,0xf4,0x00,0x00,0x00,0x00] +0x02,0x23,0x02,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b64 vcc, s[4:7], s0 offset:0x0 ; encoding: [0x82,0x3a,0x02,0xf4,0x00,0x00,0x00,0x00] +0x82,0x3a,0x02,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b96 s[20:22], s[4:7], 0x0 ; encoding: [0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0xf8] +0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0xf8 + +# GFX12: s_buffer_load_b96 s[20:22], s[4:7], m0 offset:0x0 ; encoding: [0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0xfa] +0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0xfa + +# GFX12: s_buffer_load_b96 s[20:22], s[4:7], s0 offset:0x0 ; encoding: [0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0x00] +0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b96 s[20:22], s[4:7], s101 offset:0x0 ; encoding: [0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0xca] +0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0xca + +# GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_hi offset:0x0 ; encoding: [0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0xd6] +0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0xd6 + +# GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 ; encoding: [0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0xd4] +0x02,0xa5,0x02,0xf4,0x00,0x00,0x00,0xd4 + +# GFX12: s_buffer_load_b96 s[20:22], s[8:11], s0 offset:0x0 ; encoding: [0x04,0xa5,0x02,0xf4,0x00,0x00,0x00,0x00] +0x04,0xa5,0x02,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b96 s[20:22], s[96:99], s0 offset:0x0 ; encoding: [0x30,0xa5,0x02,0xf4,0x00,0x00,0x00,0x00] +0x30,0xa5,0x02,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b96 s[24:26], s[4:7], s0 offset:0x0 ; encoding: [0x02,0xa6,0x02,0xf4,0x00,0x00,0x00,0x00] +0x02,0xa6,0x02,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b96 s[96:98], s[4:7], s0 offset:0x0 ; encoding: [0x02,0xb8,0x02,0xf4,0x00,0x00,0x00,0x00] +0x02,0xb8,0x02,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b128 s[20:23], s[4:7], 0x0 ; encoding: [0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0xf8] +0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0xf8 + +# GFX12: s_buffer_load_b128 s[20:23], s[4:7], m0 offset:0x0 ; encoding: [0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0xfa] +0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0xfa + +# GFX12: s_buffer_load_b128 s[20:23], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0x00] +0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b128 s[20:23], s[4:7], s101 offset:0x0 ; encoding: [0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0xca] +0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0xca + +# GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_hi offset:0x0 ; encoding: [0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0xd6] +0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0xd6 + +# GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 ; encoding: [0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0xd4] +0x02,0x45,0x02,0xf4,0x00,0x00,0x00,0xd4 + +# GFX12: s_buffer_load_b128 s[20:23], s[8:11], s0 offset:0x0 ; encoding: [0x04,0x45,0x02,0xf4,0x00,0x00,0x00,0x00] +0x04,0x45,0x02,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b128 s[20:23], s[96:99], s0 offset:0x0 ; encoding: [0x30,0x45,0x02,0xf4,0x00,0x00,0x00,0x00] +0x30,0x45,0x02,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b128 s[24:27], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x46,0x02,0xf4,0x00,0x00,0x00,0x00] +0x02,0x46,0x02,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b128 s[96:99], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x58,0x02,0xf4,0x00,0x00,0x00,0x00] +0x02,0x58,0x02,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b256 s[20:27], s[4:7], 0x0 ; encoding: [0x02,0x65,0x02,0xf4,0x00,0x00,0x00,0xf8] +0x02,0x65,0x02,0xf4,0x00,0x00,0x00,0xf8 + +# GFX12: s_buffer_load_b256 s[20:27], s[4:7], m0 offset:0x0 ; encoding: [0x02,0x65,0x02,0xf4,0x00,0x00,0x00,0xfa] +0x02,0x65,0x02,0xf4,0x00,0x00,0x00,0xfa + +# GFX12: s_buffer_load_b256 s[20:27], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x65,0x02,0xf4,0x00,0x00,0x00,0x00] +0x02,0x65,0x02,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b256 s[20:27], s[4:7], s101 offset:0x0 ; encoding: [0x02,0x65,0x02,0xf4,0x00,0x00,0x00,0xca] +0x02,0x65,0x02,0xf4,0x00,0x00,0x00,0xca + +# GFX12: s_buffer_load_b256 s[20:27], s[4:7], vcc_hi offset:0x0 ; encoding: [0x02,0x65,0x02,0xf4,0x00,0x00,0x00,0xd6] +0x02,0x65,0x02,0xf4,0x00,0x00,0x00,0xd6 + +# GFX12: s_buffer_load_b256 s[20:27], s[4:7], vcc_lo offset:0x0 ; encoding: [0x02,0x65,0x02,0xf4,0x00,0x00,0x00,0xd4] +0x02,0x65,0x02,0xf4,0x00,0x00,0x00,0xd4 + +# GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 ; encoding: [0x04,0x65,0x02,0xf4,0x00,0x00,0x00,0x00] +0x04,0x65,0x02,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b256 s[20:27], s[96:99], s0 offset:0x0 ; encoding: [0x30,0x65,0x02,0xf4,0x00,0x00,0x00,0x00] +0x30,0x65,0x02,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b256 s[24:31], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x66,0x02,0xf4,0x00,0x00,0x00,0x00] +0x02,0x66,0x02,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b256 s[92:99], s[4:7], s0 offset:0x0 ; encoding: [0x02,0x77,0x02,0xf4,0x00,0x00,0x00,0x00] +0x02,0x77,0x02,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_dcache_inv ; encoding: [0x00,0x20,0x04,0xf4,0x00,0x00,0x00,0x00] +0x00,0x20,0x04,0xf4,0x00,0x00,0x00,0x00 + # GFX12: s_prefetch_inst s[12:13], 0x10, s4, 2 ; encoding: [0x86,0x80,0x04,0xf4,0x10,0x00,0x00,0x08] 0x86,0x80,0x04,0xf4,0x10,0x00,0x00,0x08 @@ -29,3 +422,345 @@ # GFX12: s_buffer_prefetch_data s[20:23], 0x64, null, 7 ; encoding: [0xca,0xe1,0x04,0xf4,0x64,0x00,0x00,0xf8] 0xca,0xe1,0x04,0xf4,0x64,0x00,0x00,0xf8 + +# GFX12: s_atc_probe 7, s[4:5], s2 offset:0x0 ; encoding: [0xc2,0x41,0x04,0xf4,0x00,0x00,0x00,0x04] +0xc2,0x41,0x04,0xf4,0x00,0x00,0x00,0x04 + +# GFX12: s_atc_probe 7, s[4:5], 0x64 ; encoding: [0xc2,0x41,0x04,0xf4,0x64,0x00,0x00,0xf8] +0xc2,0x41,0x04,0xf4,0x64,0x00,0x00,0xf8 + +# GFX12: s_atc_probe 7, s[4:5], s9 offset:0x64 ; encoding: [0xc2,0x41,0x04,0xf4,0x64,0x00,0x00,0x12] +0xc2,0x41,0x04,0xf4,0x64,0x00,0x00,0x12 + +# GFX12: s_atc_probe_buffer 7, s[8:11], s2 offset:0x0 ; encoding: [0xc4,0x61,0x04,0xf4,0x00,0x00,0x00,0x04] +0xc4,0x61,0x04,0xf4,0x00,0x00,0x00,0x04 + +# GFX12: s_atc_probe_buffer 7, s[8:11], 0x64 ; encoding: [0xc4,0x61,0x04,0xf4,0x64,0x00,0x00,0xf8] +0xc4,0x61,0x04,0xf4,0x64,0x00,0x00,0xf8 + +# GFX12: s_atc_probe_buffer 7, s[8:11], s9 offset:0x64 ; encoding: [0xc4,0x61,0x04,0xf4,0x64,0x00,0x00,0x12] +0xc4,0x61,0x04,0xf4,0x64,0x00,0x00,0x12 + +# GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_NT ; encoding: [0x42,0x01,0x80,0xf4,0x00,0x00,0x00,0x00] +0x42,0x01,0x80,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_HT ; encoding: [0x42,0x01,0x00,0xf5,0x00,0x00,0x00,0x00] +0x42,0x01,0x00,0xf5,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_LU ; encoding: [0x42,0x01,0x80,0xf5,0x00,0x00,0x00,0x00] +0x42,0x01,0x80,0xf5,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 scope:SCOPE_SE ; encoding: [0x42,0x01,0x20,0xf4,0x00,0x00,0x00,0x00] +0x42,0x01,0x20,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 scope:SCOPE_DEV ; encoding: [0x42,0x01,0x40,0xf4,0x00,0x00,0x00,0x00] +0x42,0x01,0x40,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 scope:SCOPE_SYS ; encoding: [0x42,0x01,0x60,0xf4,0x00,0x00,0x00,0x00] +0x42,0x01,0x60,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x42,0x01,0x20,0xf5,0x00,0x00,0x00,0x00] +0x42,0x01,0x20,0xf5,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x42,0x01,0xa0,0xf5,0x00,0x00,0x00,0x00] +0x42,0x01,0xa0,0xf5,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x42,0x01,0x60,0xf5,0x00,0x00,0x00,0x00] +0x42,0x01,0x60,0xf5,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_NT ; encoding: [0x81,0x22,0x80,0xf4,0x00,0x00,0x00,0xfa] +0x81,0x22,0x80,0xf4,0x00,0x00,0x00,0xfa + +# GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_HT ; encoding: [0x81,0x22,0x00,0xf5,0x00,0x00,0x00,0xfa] +0x81,0x22,0x00,0xf5,0x00,0x00,0x00,0xfa + +# GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_LU ; encoding: [0x81,0x22,0x80,0xf5,0x00,0x00,0x00,0xfa] +0x81,0x22,0x80,0xf5,0x00,0x00,0x00,0xfa + +# GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 scope:SCOPE_SE ; encoding: [0x81,0x22,0x20,0xf4,0x00,0x00,0x00,0xfa] +0x81,0x22,0x20,0xf4,0x00,0x00,0x00,0xfa + +# GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 scope:SCOPE_DEV ; encoding: [0x81,0x22,0x40,0xf4,0x00,0x00,0x00,0xfa] +0x81,0x22,0x40,0xf4,0x00,0x00,0x00,0xfa + +# GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 scope:SCOPE_SYS ; encoding: [0x81,0x22,0x60,0xf4,0x00,0x00,0x00,0xfa] +0x81,0x22,0x60,0xf4,0x00,0x00,0x00,0xfa + +# GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x81,0x22,0x20,0xf5,0x00,0x00,0x00,0xfa] +0x81,0x22,0x20,0xf5,0x00,0x00,0x00,0xfa + +# GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x81,0x22,0xa0,0xf5,0x00,0x00,0x00,0xfa] +0x81,0x22,0xa0,0xf5,0x00,0x00,0x00,0xfa + +# GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x81,0x22,0x60,0xf5,0x00,0x00,0x00,0xfa] +0x81,0x22,0x60,0xf5,0x00,0x00,0x00,0xfa + +# GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_NT ; encoding: [0x01,0xa5,0x80,0xf4,0x00,0x00,0x00,0xd4] +0x01,0xa5,0x80,0xf4,0x00,0x00,0x00,0xd4 + +# GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_HT ; encoding: [0x01,0xa5,0x00,0xf5,0x00,0x00,0x00,0xd4] +0x01,0xa5,0x00,0xf5,0x00,0x00,0x00,0xd4 + +# GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_LU ; encoding: [0x01,0xa5,0x80,0xf5,0x00,0x00,0x00,0xd4] +0x01,0xa5,0x80,0xf5,0x00,0x00,0x00,0xd4 + +# GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 scope:SCOPE_SE ; encoding: [0x01,0xa5,0x20,0xf4,0x00,0x00,0x00,0xd4] +0x01,0xa5,0x20,0xf4,0x00,0x00,0x00,0xd4 + +# GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 scope:SCOPE_DEV ; encoding: [0x01,0xa5,0x40,0xf4,0x00,0x00,0x00,0xd4] +0x01,0xa5,0x40,0xf4,0x00,0x00,0x00,0xd4 + +# GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 scope:SCOPE_SYS ; encoding: [0x01,0xa5,0x60,0xf4,0x00,0x00,0x00,0xd4] +0x01,0xa5,0x60,0xf4,0x00,0x00,0x00,0xd4 + +# GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x01,0xa5,0x20,0xf5,0x00,0x00,0x00,0xd4] +0x01,0xa5,0x20,0xf5,0x00,0x00,0x00,0xd4 + +# GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x01,0xa5,0xa0,0xf5,0x00,0x00,0x00,0xd4] +0x01,0xa5,0xa0,0xf5,0x00,0x00,0x00,0xd4 + +# GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x01,0xa5,0x60,0xf5,0x00,0x00,0x00,0xd4] +0x01,0xa5,0x60,0xf5,0x00,0x00,0x00,0xd4 + +# GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_NT ; encoding: [0x01,0x45,0x80,0xf4,0x00,0x00,0x00,0xd4] +0x01,0x45,0x80,0xf4,0x00,0x00,0x00,0xd4 + +# GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_HT ; encoding: [0x01,0x45,0x00,0xf5,0x00,0x00,0x00,0xd4] +0x01,0x45,0x00,0xf5,0x00,0x00,0x00,0xd4 + +# GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_LU ; encoding: [0x01,0x45,0x80,0xf5,0x00,0x00,0x00,0xd4] +0x01,0x45,0x80,0xf5,0x00,0x00,0x00,0xd4 + +# GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 scope:SCOPE_SE ; encoding: [0x01,0x45,0x20,0xf4,0x00,0x00,0x00,0xd4] +0x01,0x45,0x20,0xf4,0x00,0x00,0x00,0xd4 + +# GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 scope:SCOPE_DEV ; encoding: [0x01,0x45,0x40,0xf4,0x00,0x00,0x00,0xd4] +0x01,0x45,0x40,0xf4,0x00,0x00,0x00,0xd4 + +# GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 scope:SCOPE_SYS ; encoding: [0x01,0x45,0x60,0xf4,0x00,0x00,0x00,0xd4] +0x01,0x45,0x60,0xf4,0x00,0x00,0x00,0xd4 + +# GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x01,0x45,0x20,0xf5,0x00,0x00,0x00,0xd4] +0x01,0x45,0x20,0xf5,0x00,0x00,0x00,0xd4 + +# GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x01,0x45,0xa0,0xf5,0x00,0x00,0x00,0xd4] +0x01,0x45,0xa0,0xf5,0x00,0x00,0x00,0xd4 + +# GFX12: s_load_b128 s[20:23], s[2:3], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x01,0x45,0x60,0xf5,0x00,0x00,0x00,0xd4] +0x01,0x45,0x60,0xf5,0x00,0x00,0x00,0xd4 + +# GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_NT ; encoding: [0x01,0x77,0x80,0xf4,0x00,0x00,0x00,0x00] +0x01,0x77,0x80,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_HT ; encoding: [0x01,0x77,0x00,0xf5,0x00,0x00,0x00,0x00] +0x01,0x77,0x00,0xf5,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_LU ; encoding: [0x01,0x77,0x80,0xf5,0x00,0x00,0x00,0x00] +0x01,0x77,0x80,0xf5,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 scope:SCOPE_SE ; encoding: [0x01,0x77,0x20,0xf4,0x00,0x00,0x00,0x00] +0x01,0x77,0x20,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 scope:SCOPE_DEV ; encoding: [0x01,0x77,0x40,0xf4,0x00,0x00,0x00,0x00] +0x01,0x77,0x40,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 scope:SCOPE_SYS ; encoding: [0x01,0x77,0x60,0xf4,0x00,0x00,0x00,0x00] +0x01,0x77,0x60,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x01,0x77,0x20,0xf5,0x00,0x00,0x00,0x00] +0x01,0x77,0x20,0xf5,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x01,0x77,0xa0,0xf5,0x00,0x00,0x00,0x00] +0x01,0x77,0xa0,0xf5,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b256 s[92:99], s[2:3], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x01,0x77,0x60,0xf5,0x00,0x00,0x00,0x00] +0x01,0x77,0x60,0xf5,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_NT ; encoding: [0x02,0x85,0x80,0xf4,0x00,0x00,0x00,0x00] +0x02,0x85,0x80,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_HT ; encoding: [0x02,0x85,0x00,0xf5,0x00,0x00,0x00,0x00] +0x02,0x85,0x00,0xf5,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_LU ; encoding: [0x02,0x85,0x80,0xf5,0x00,0x00,0x00,0x00] +0x02,0x85,0x80,0xf5,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 scope:SCOPE_SE ; encoding: [0x02,0x85,0x20,0xf4,0x00,0x00,0x00,0x00] +0x02,0x85,0x20,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 scope:SCOPE_DEV ; encoding: [0x02,0x85,0x40,0xf4,0x00,0x00,0x00,0x00] +0x02,0x85,0x40,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 scope:SCOPE_SYS ; encoding: [0x02,0x85,0x60,0xf4,0x00,0x00,0x00,0x00] +0x02,0x85,0x60,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x02,0x85,0x20,0xf5,0x00,0x00,0x00,0x00] +0x02,0x85,0x20,0xf5,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x02,0x85,0xa0,0xf5,0x00,0x00,0x00,0x00] +0x02,0x85,0xa0,0xf5,0x00,0x00,0x00,0x00 + +# GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x02,0x85,0x60,0xf5,0x00,0x00,0x00,0x00] +0x02,0x85,0x60,0xf5,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_NT ; encoding: [0x70,0x01,0x82,0xf4,0x00,0x00,0x00,0x00] +0x70,0x01,0x82,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_HT ; encoding: [0x70,0x01,0x02,0xf5,0x00,0x00,0x00,0x00] +0x70,0x01,0x02,0xf5,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_LU ; encoding: [0x70,0x01,0x82,0xf5,0x00,0x00,0x00,0x00] +0x70,0x01,0x82,0xf5,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 scope:SCOPE_SE ; encoding: [0x70,0x01,0x22,0xf4,0x00,0x00,0x00,0x00] +0x70,0x01,0x22,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 scope:SCOPE_DEV ; encoding: [0x70,0x01,0x42,0xf4,0x00,0x00,0x00,0x00] +0x70,0x01,0x42,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 scope:SCOPE_SYS ; encoding: [0x70,0x01,0x62,0xf4,0x00,0x00,0x00,0x00] +0x70,0x01,0x62,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x70,0x01,0x22,0xf5,0x00,0x00,0x00,0x00] +0x70,0x01,0x22,0xf5,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x70,0x01,0xa2,0xf5,0x00,0x00,0x00,0x00] +0x70,0x01,0xa2,0xf5,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b32 s5, s[96:99], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x70,0x01,0x62,0xf5,0x00,0x00,0x00,0x00] +0x70,0x01,0x62,0xf5,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_NT ; encoding: [0x82,0x22,0x82,0xf4,0x00,0x00,0x00,0x00] +0x82,0x22,0x82,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_HT ; encoding: [0x82,0x22,0x02,0xf5,0x00,0x00,0x00,0x00] +0x82,0x22,0x02,0xf5,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_LU ; encoding: [0x82,0x22,0x82,0xf5,0x00,0x00,0x00,0x00] +0x82,0x22,0x82,0xf5,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 scope:SCOPE_SE ; encoding: [0x82,0x22,0x22,0xf4,0x00,0x00,0x00,0x00] +0x82,0x22,0x22,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 scope:SCOPE_DEV ; encoding: [0x82,0x22,0x42,0xf4,0x00,0x00,0x00,0x00] +0x82,0x22,0x42,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 scope:SCOPE_SYS ; encoding: [0x82,0x22,0x62,0xf4,0x00,0x00,0x00,0x00] +0x82,0x22,0x62,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x82,0x22,0x22,0xf5,0x00,0x00,0x00,0x00] +0x82,0x22,0x22,0xf5,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x82,0x22,0xa2,0xf5,0x00,0x00,0x00,0x00] +0x82,0x22,0xa2,0xf5,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b64 s[10:11], s[4:7], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x82,0x22,0x62,0xf5,0x00,0x00,0x00,0x00] +0x82,0x22,0x62,0xf5,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_NT ; encoding: [0x02,0xa5,0x82,0xf4,0x00,0x00,0x00,0xd4] +0x02,0xa5,0x82,0xf4,0x00,0x00,0x00,0xd4 + +# GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_HT ; encoding: [0x02,0xa5,0x02,0xf5,0x00,0x00,0x00,0xd4] +0x02,0xa5,0x02,0xf5,0x00,0x00,0x00,0xd4 + +# GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_LU ; encoding: [0x02,0xa5,0x82,0xf5,0x00,0x00,0x00,0xd4] +0x02,0xa5,0x82,0xf5,0x00,0x00,0x00,0xd4 + +# GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 scope:SCOPE_SE ; encoding: [0x02,0xa5,0x22,0xf4,0x00,0x00,0x00,0xd4] +0x02,0xa5,0x22,0xf4,0x00,0x00,0x00,0xd4 + +# GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 scope:SCOPE_DEV ; encoding: [0x02,0xa5,0x42,0xf4,0x00,0x00,0x00,0xd4] +0x02,0xa5,0x42,0xf4,0x00,0x00,0x00,0xd4 + +# GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 scope:SCOPE_SYS ; encoding: [0x02,0xa5,0x62,0xf4,0x00,0x00,0x00,0xd4] +0x02,0xa5,0x62,0xf4,0x00,0x00,0x00,0xd4 + +# GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x02,0xa5,0x22,0xf5,0x00,0x00,0x00,0xd4] +0x02,0xa5,0x22,0xf5,0x00,0x00,0x00,0xd4 + +# GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x02,0xa5,0xa2,0xf5,0x00,0x00,0x00,0xd4] +0x02,0xa5,0xa2,0xf5,0x00,0x00,0x00,0xd4 + +# GFX12: s_buffer_load_b96 s[20:22], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x02,0xa5,0x62,0xf5,0x00,0x00,0x00,0xd4] +0x02,0xa5,0x62,0xf5,0x00,0x00,0x00,0xd4 + +# GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_NT ; encoding: [0x02,0x45,0x82,0xf4,0x00,0x00,0x00,0xd4] +0x02,0x45,0x82,0xf4,0x00,0x00,0x00,0xd4 + +# GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_HT ; encoding: [0x02,0x45,0x02,0xf5,0x00,0x00,0x00,0xd4] +0x02,0x45,0x02,0xf5,0x00,0x00,0x00,0xd4 + +# GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_LU ; encoding: [0x02,0x45,0x82,0xf5,0x00,0x00,0x00,0xd4] +0x02,0x45,0x82,0xf5,0x00,0x00,0x00,0xd4 + +# GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 scope:SCOPE_SE ; encoding: [0x02,0x45,0x22,0xf4,0x00,0x00,0x00,0xd4] +0x02,0x45,0x22,0xf4,0x00,0x00,0x00,0xd4 + +# GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 scope:SCOPE_DEV ; encoding: [0x02,0x45,0x42,0xf4,0x00,0x00,0x00,0xd4] +0x02,0x45,0x42,0xf4,0x00,0x00,0x00,0xd4 + +# GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 scope:SCOPE_SYS ; encoding: [0x02,0x45,0x62,0xf4,0x00,0x00,0x00,0xd4] +0x02,0x45,0x62,0xf4,0x00,0x00,0x00,0xd4 + +# GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x02,0x45,0x22,0xf5,0x00,0x00,0x00,0xd4] +0x02,0x45,0x22,0xf5,0x00,0x00,0x00,0xd4 + +# GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x02,0x45,0xa2,0xf5,0x00,0x00,0x00,0xd4] +0x02,0x45,0xa2,0xf5,0x00,0x00,0x00,0xd4 + +# GFX12: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x02,0x45,0x62,0xf5,0x00,0x00,0x00,0xd4] +0x02,0x45,0x62,0xf5,0x00,0x00,0x00,0xd4 + +# GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_NT ; encoding: [0x04,0x65,0x82,0xf4,0x00,0x00,0x00,0x00] +0x04,0x65,0x82,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_HT ; encoding: [0x04,0x65,0x02,0xf5,0x00,0x00,0x00,0x00] +0x04,0x65,0x02,0xf5,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_LU ; encoding: [0x04,0x65,0x82,0xf5,0x00,0x00,0x00,0x00] +0x04,0x65,0x82,0xf5,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 scope:SCOPE_SE ; encoding: [0x04,0x65,0x22,0xf4,0x00,0x00,0x00,0x00] +0x04,0x65,0x22,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 scope:SCOPE_DEV ; encoding: [0x04,0x65,0x42,0xf4,0x00,0x00,0x00,0x00] +0x04,0x65,0x42,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 scope:SCOPE_SYS ; encoding: [0x04,0x65,0x62,0xf4,0x00,0x00,0x00,0x00] +0x04,0x65,0x62,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x04,0x65,0x22,0xf5,0x00,0x00,0x00,0x00] +0x04,0x65,0x22,0xf5,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x04,0x65,0xa2,0xf5,0x00,0x00,0x00,0x00] +0x04,0x65,0xa2,0xf5,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b256 s[20:27], s[8:11], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x04,0x65,0x62,0xf5,0x00,0x00,0x00,0x00] +0x04,0x65,0x62,0xf5,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_NT ; encoding: [0x30,0x85,0x82,0xf4,0x00,0x00,0x00,0x00] +0x30,0x85,0x82,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_HT ; encoding: [0x30,0x85,0x02,0xf5,0x00,0x00,0x00,0x00] +0x30,0x85,0x02,0xf5,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_LU ; encoding: [0x30,0x85,0x82,0xf5,0x00,0x00,0x00,0x00] +0x30,0x85,0x82,0xf5,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 scope:SCOPE_SE ; encoding: [0x30,0x85,0x22,0xf4,0x00,0x00,0x00,0x00] +0x30,0x85,0x22,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 scope:SCOPE_DEV ; encoding: [0x30,0x85,0x42,0xf4,0x00,0x00,0x00,0x00] +0x30,0x85,0x42,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 scope:SCOPE_SYS ; encoding: [0x30,0x85,0x62,0xf4,0x00,0x00,0x00,0x00] +0x30,0x85,0x62,0xf4,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x30,0x85,0x22,0xf5,0x00,0x00,0x00,0x00] +0x30,0x85,0x22,0xf5,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x30,0x85,0xa2,0xf5,0x00,0x00,0x00,0x00] +0x30,0x85,0xa2,0xf5,0x00,0x00,0x00,0x00 + +# GFX12: s_buffer_load_b512 s[20:35], s[96:99], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x30,0x85,0x62,0xf5,0x00,0x00,0x00,0x00] +0x30,0x85,0x62,0xf5,0x00,0x00,0x00,0x00