diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 486c1defc332a..d2f18fefd9866 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -161,13 +161,11 @@ static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder) { \ assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \ auto DAsm = static_cast(Decoder); \ - return addOperand(Inst, \ - DAsm->decodeSrcOp(AMDGPUDisassembler::OpWidth, EncImm)); \ + return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm)); \ } static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, - AMDGPUDisassembler::OpWidthTy OpWidth, - unsigned Imm, unsigned EncImm, + unsigned OpWidth, unsigned Imm, unsigned EncImm, const MCDisassembler *Decoder) { assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!"); const auto *DAsm = static_cast(Decoder); @@ -186,7 +184,7 @@ static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, // Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC). // Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp. // Used by AV_ register classes (AGPR or VGPR only register operands). -template +template static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */, const MCDisassembler *Decoder) { return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm | AMDGPU::EncValues::IS_VGPR, @@ -194,7 +192,7 @@ static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */, } // Decoder for Src(9-bit encoding) registers only. -template +template static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */, const MCDisassembler *Decoder) { @@ -204,7 +202,7 @@ static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, // Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set // Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers // only. -template +template static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */, const MCDisassembler *Decoder) { return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder); @@ -212,7 +210,7 @@ static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */, // Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding // Imm{9} is acc, registers only. -template +template static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */, const MCDisassembler *Decoder) { @@ -224,7 +222,7 @@ static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, // will be decoded and InstPrinter will report warning. Immediate will be // decoded into constant matching the OperandType (important for floating point // types). -template +template static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */, const MCDisassembler *Decoder) { @@ -233,14 +231,14 @@ static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, // Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc) // and decode using 'enum10' from decodeSrcOp. -template +template static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */, const MCDisassembler *Decoder) { return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder); } -template +template static DecodeStatus decodeSrcRegOrImmDeferred9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */, const MCDisassembler *Decoder) { @@ -265,21 +263,21 @@ DECODE_OPERAND_REG_8(VReg_384) DECODE_OPERAND_REG_8(VReg_512) DECODE_OPERAND_REG_8(VReg_1024) -DECODE_OPERAND_SREG_7(SReg_32, OPW32) -DECODE_OPERAND_SREG_7(SReg_32_XM0, OPW32) -DECODE_OPERAND_SREG_7(SReg_32_XEXEC, OPW32) -DECODE_OPERAND_SREG_7(SReg_32_XM0_XEXEC, OPW32) -DECODE_OPERAND_SREG_7(SReg_32_XEXEC_HI, OPW32) -DECODE_OPERAND_SREG_7(SReg_64_XEXEC, OPW64) -DECODE_OPERAND_SREG_7(SReg_64_XEXEC_XNULL, OPW64) -DECODE_OPERAND_SREG_7(SReg_96, OPW96) -DECODE_OPERAND_SREG_7(SReg_128, OPW128) -DECODE_OPERAND_SREG_7(SReg_128_XNULL, OPW128) -DECODE_OPERAND_SREG_7(SReg_256, OPW256) -DECODE_OPERAND_SREG_7(SReg_256_XNULL, OPW256) -DECODE_OPERAND_SREG_7(SReg_512, OPW512) - -DECODE_OPERAND_SREG_8(SReg_64, OPW64) +DECODE_OPERAND_SREG_7(SReg_32, 32) +DECODE_OPERAND_SREG_7(SReg_32_XM0, 32) +DECODE_OPERAND_SREG_7(SReg_32_XEXEC, 32) +DECODE_OPERAND_SREG_7(SReg_32_XM0_XEXEC, 32) +DECODE_OPERAND_SREG_7(SReg_32_XEXEC_HI, 32) +DECODE_OPERAND_SREG_7(SReg_64_XEXEC, 64) +DECODE_OPERAND_SREG_7(SReg_64_XEXEC_XNULL, 64) +DECODE_OPERAND_SREG_7(SReg_96, 96) +DECODE_OPERAND_SREG_7(SReg_128, 128) +DECODE_OPERAND_SREG_7(SReg_128_XNULL, 128) +DECODE_OPERAND_SREG_7(SReg_256, 256) +DECODE_OPERAND_SREG_7(SReg_256_XNULL, 256) +DECODE_OPERAND_SREG_7(SReg_512, 512) + +DECODE_OPERAND_SREG_8(SReg_64, 64) DECODE_OPERAND_REG_8(AGPR_32) DECODE_OPERAND_REG_8(AReg_64) @@ -311,7 +309,7 @@ DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi)); } -template +template static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, const MCDisassembler *Decoder) { @@ -326,7 +324,7 @@ static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(OpWidth, Imm & 0xFF)); } -template +template static DecodeStatus decodeOperand_VSrcT16_Lo128_Deferred(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, @@ -342,7 +340,7 @@ decodeOperand_VSrcT16_Lo128_Deferred(MCInst &Inst, unsigned Imm, return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(OpWidth, Imm & 0xFF)); } -template +template static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, const MCDisassembler *Decoder) { @@ -397,8 +395,7 @@ static bool IsAGPROperand(const MCInst &Inst, int OpIdx, return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255; } -static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, - AMDGPUDisassembler::OpWidthTy Opw, +static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw, const MCDisassembler *Decoder) { const auto *DAsm = static_cast(Decoder); if (!DAsm->isGFX90A()) { @@ -432,7 +429,7 @@ static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256)); } -template +template static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, uint64_t /* Addr */, const MCDisassembler *Decoder) { @@ -444,7 +441,7 @@ static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, const MCDisassembler *Decoder) { assert(Imm < (1 << 9) && "9-bit encoding"); const auto *DAsm = static_cast(Decoder); - return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm)); + return addOperand(Inst, DAsm->decodeSrcOp(64, Imm)); } #define DECODE_SDWA(DecName) \ @@ -1629,102 +1626,130 @@ static int64_t getInlineImmValBF16(unsigned Imm) { } } -unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const { +unsigned AMDGPUDisassembler::getVgprClassId(unsigned Width) const { using namespace AMDGPU; - assert(OPW_FIRST_ <= Width && Width < OPW_LAST_); switch (Width) { - default: // fall - case OPW32: - case OPW16: - case OPWV216: + case 16: + case 32: return VGPR_32RegClassID; - case OPW64: - case OPWV232: return VReg_64RegClassID; - case OPW96: return VReg_96RegClassID; - case OPW128: return VReg_128RegClassID; - case OPW192: return VReg_192RegClassID; - case OPW160: return VReg_160RegClassID; - case OPW256: return VReg_256RegClassID; - case OPW288: return VReg_288RegClassID; - case OPW320: return VReg_320RegClassID; - case OPW352: return VReg_352RegClassID; - case OPW384: return VReg_384RegClassID; - case OPW512: return VReg_512RegClassID; - case OPW1024: return VReg_1024RegClassID; - } -} - -unsigned AMDGPUDisassembler::getAgprClassId(const OpWidthTy Width) const { + case 64: + return VReg_64RegClassID; + case 96: + return VReg_96RegClassID; + case 128: + return VReg_128RegClassID; + case 160: + return VReg_160RegClassID; + case 192: + return VReg_192RegClassID; + case 256: + return VReg_256RegClassID; + case 288: + return VReg_288RegClassID; + case 320: + return VReg_320RegClassID; + case 352: + return VReg_352RegClassID; + case 384: + return VReg_384RegClassID; + case 512: + return VReg_512RegClassID; + case 1024: + return VReg_1024RegClassID; + } + llvm_unreachable("Invalid register width!"); +} + +unsigned AMDGPUDisassembler::getAgprClassId(unsigned Width) const { using namespace AMDGPU; - assert(OPW_FIRST_ <= Width && Width < OPW_LAST_); switch (Width) { - default: // fall - case OPW32: - case OPW16: - case OPWV216: + case 16: + case 32: return AGPR_32RegClassID; - case OPW64: - case OPWV232: return AReg_64RegClassID; - case OPW96: return AReg_96RegClassID; - case OPW128: return AReg_128RegClassID; - case OPW160: return AReg_160RegClassID; - case OPW256: return AReg_256RegClassID; - case OPW288: return AReg_288RegClassID; - case OPW320: return AReg_320RegClassID; - case OPW352: return AReg_352RegClassID; - case OPW384: return AReg_384RegClassID; - case OPW512: return AReg_512RegClassID; - case OPW1024: return AReg_1024RegClassID; - } -} - - -unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const { + case 64: + return AReg_64RegClassID; + case 96: + return AReg_96RegClassID; + case 128: + return AReg_128RegClassID; + case 160: + return AReg_160RegClassID; + case 256: + return AReg_256RegClassID; + case 288: + return AReg_288RegClassID; + case 320: + return AReg_320RegClassID; + case 352: + return AReg_352RegClassID; + case 384: + return AReg_384RegClassID; + case 512: + return AReg_512RegClassID; + case 1024: + return AReg_1024RegClassID; + } + llvm_unreachable("Invalid register width!"); +} + +unsigned AMDGPUDisassembler::getSgprClassId(unsigned Width) const { using namespace AMDGPU; - assert(OPW_FIRST_ <= Width && Width < OPW_LAST_); switch (Width) { - default: // fall - case OPW32: - case OPW16: - case OPWV216: + case 16: + case 32: return SGPR_32RegClassID; - case OPW64: - case OPWV232: return SGPR_64RegClassID; - case OPW96: return SGPR_96RegClassID; - case OPW128: return SGPR_128RegClassID; - case OPW160: return SGPR_160RegClassID; - case OPW256: return SGPR_256RegClassID; - case OPW288: return SGPR_288RegClassID; - case OPW320: return SGPR_320RegClassID; - case OPW352: return SGPR_352RegClassID; - case OPW384: return SGPR_384RegClassID; - case OPW512: return SGPR_512RegClassID; - } -} - -unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const { + case 64: + return SGPR_64RegClassID; + case 96: + return SGPR_96RegClassID; + case 128: + return SGPR_128RegClassID; + case 160: + return SGPR_160RegClassID; + case 256: + return SGPR_256RegClassID; + case 288: + return SGPR_288RegClassID; + case 320: + return SGPR_320RegClassID; + case 352: + return SGPR_352RegClassID; + case 384: + return SGPR_384RegClassID; + case 512: + return SGPR_512RegClassID; + } + llvm_unreachable("Invalid register width!"); +} + +unsigned AMDGPUDisassembler::getTtmpClassId(unsigned Width) const { using namespace AMDGPU; - assert(OPW_FIRST_ <= Width && Width < OPW_LAST_); switch (Width) { - default: // fall - case OPW32: - case OPW16: - case OPWV216: + case 16: + case 32: return TTMP_32RegClassID; - case OPW64: - case OPWV232: return TTMP_64RegClassID; - case OPW128: return TTMP_128RegClassID; - case OPW256: return TTMP_256RegClassID; - case OPW288: return TTMP_288RegClassID; - case OPW320: return TTMP_320RegClassID; - case OPW352: return TTMP_352RegClassID; - case OPW384: return TTMP_384RegClassID; - case OPW512: return TTMP_512RegClassID; - } + case 64: + return TTMP_64RegClassID; + case 128: + return TTMP_128RegClassID; + case 256: + return TTMP_256RegClassID; + case 288: + return TTMP_288RegClassID; + case 320: + return TTMP_320RegClassID; + case 352: + return TTMP_352RegClassID; + case 384: + return TTMP_384RegClassID; + case 512: + return TTMP_512RegClassID; + } + llvm_unreachable("Invalid register width!"); } int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const { @@ -1736,8 +1761,7 @@ int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const { return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1; } -MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, - unsigned Val) const { +MCOperand AMDGPUDisassembler::decodeSrcOp(unsigned Width, unsigned Val) const { using namespace AMDGPU::EncValues; assert(Val < 1024); // enum10 @@ -1752,7 +1776,7 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, return decodeNonVGPRSrcOp(Width, Val & 0xFF); } -MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width, +MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(unsigned Width, unsigned Val) const { // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been // decoded earlier. @@ -1776,17 +1800,15 @@ MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width, return MCOperand::createImm(Val); switch (Width) { - case OPW32: - case OPW16: - case OPWV216: + case 32: + case 16: return decodeSpecialReg32(Val); - case OPW64: - case OPWV232: + case 64: return decodeSpecialReg64(Val); - case OPW96: - case OPW128: - case OPW256: - case OPW512: + case 96: + case 128: + case 256: + case 512: return decodeSpecialReg96Plus(Val); default: llvm_unreachable("unexpected immediate type"); @@ -1803,8 +1825,7 @@ MCOperand AMDGPUDisassembler::decodeVOPDDstYOp(MCInst &Inst, assert(Inst.getOperand(VDstXInd).isReg()); unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg()); Val |= ~XDstReg & 1; - auto Width = llvm::AMDGPUDisassembler::OPW32; - return createRegOperand(getVgprClassId(Width), Val); + return createRegOperand(getVgprClassId(32), Val); } MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const { @@ -1892,7 +1913,7 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg96Plus(unsigned Val) const { return errOperand(Val, "unknown operand encoding " + Twine(Val)); } -MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, +MCOperand AMDGPUDisassembler::decodeSDWASrc(unsigned Width, const unsigned Val) const { using namespace AMDGPU::SDWA; using namespace AMDGPU::EncValues; @@ -1932,11 +1953,11 @@ MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, } MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const { - return decodeSDWASrc(OPW16, Val); + return decodeSDWASrc(16, Val); } MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const { - return decodeSDWASrc(OPW32, Val); + return decodeSDWASrc(32, Val); } MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const { @@ -1953,25 +1974,24 @@ MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const { int TTmpIdx = getTTmpIdx(Val); if (TTmpIdx >= 0) { - auto TTmpClsId = getTtmpClassId(IsWave32 ? OPW32 : OPW64); + auto TTmpClsId = getTtmpClassId(IsWave32 ? 32 : 64); return createSRegOperand(TTmpClsId, TTmpIdx); } if (Val > SGPR_MAX) { return IsWave32 ? decodeSpecialReg32(Val) : decodeSpecialReg64(Val); } - return createSRegOperand(getSgprClassId(IsWave32 ? OPW32 : OPW64), Val); + return createSRegOperand(getSgprClassId(IsWave32 ? 32 : 64), Val); } return createRegOperand(IsWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC); } MCOperand AMDGPUDisassembler::decodeBoolReg(unsigned Val) const { - return STI.hasFeature(AMDGPU::FeatureWavefrontSize32) - ? decodeSrcOp(OPW32, Val) - : decodeSrcOp(OPW64, Val); + return STI.hasFeature(AMDGPU::FeatureWavefrontSize32) ? decodeSrcOp(32, Val) + : decodeSrcOp(64, Val); } MCOperand AMDGPUDisassembler::decodeSplitBarrier(unsigned Val) const { - return decodeSrcOp(OPW32, Val); + return decodeSrcOp(32, Val); } MCOperand AMDGPUDisassembler::decodeDpp8FI(unsigned Val) const { diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index 4603e8587a3a0..3ca7c3e1fd682 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -186,47 +186,26 @@ class AMDGPUDisassembler : public MCDisassembler { void convertMacDPPInst(MCInst &MI) const; void convertTrue16OpSel(MCInst &MI) const; - enum OpWidthTy { - OPW32, - OPW64, - OPW96, - OPW128, - OPW160, - OPW192, - OPW256, - OPW288, - OPW320, - OPW352, - OPW384, - OPW512, - OPW1024, - OPW16, - OPWV216, - OPWV232, - OPW_LAST_, - OPW_FIRST_ = OPW32 - }; - - unsigned getVgprClassId(const OpWidthTy Width) const; - unsigned getAgprClassId(const OpWidthTy Width) const; - unsigned getSgprClassId(const OpWidthTy Width) const; - unsigned getTtmpClassId(const OpWidthTy Width) const; + unsigned getVgprClassId(unsigned Width) const; + unsigned getAgprClassId(unsigned Width) const; + unsigned getSgprClassId(unsigned Width) const; + unsigned getTtmpClassId(unsigned Width) const; static MCOperand decodeIntImmed(unsigned Imm); MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const; MCOperand decodeLiteralConstant(bool ExtendFP64) const; - MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val) const; + MCOperand decodeSrcOp(unsigned Width, unsigned Val) const; - MCOperand decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val) const; + MCOperand decodeNonVGPRSrcOp(unsigned Width, unsigned Val) const; MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const; MCOperand decodeSpecialReg32(unsigned Val) const; MCOperand decodeSpecialReg64(unsigned Val) const; MCOperand decodeSpecialReg96Plus(unsigned Val) const; - MCOperand decodeSDWASrc(const OpWidthTy Width, unsigned Val) const; + MCOperand decodeSDWASrc(unsigned Width, unsigned Val) const; MCOperand decodeSDWASrc16(unsigned Val) const; MCOperand decodeSDWASrc32(unsigned Val) const; MCOperand decodeSDWAVopcDst(unsigned Val) const; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 5a576e91f7c9c..dc08b7d5a8e69 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -770,6 +770,7 @@ def SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, let AllocationPriority = 0; let HasSGPR = 1; let BaseClassOrder = 32; + let Size = 32; } def SGPR_NULL128 : SIReg<"null">; @@ -780,6 +781,7 @@ def SRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v (add SReg_32, LDS_DIRECT_CLASS)> { let isAllocatable = 0; let HasSGPR = 1; + let Size = 32; } def SGPR_64 : SIRegisterClass<"AMDGPU", Reg64Types.types, 32, @@ -831,6 +833,7 @@ def SReg_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f1 let AllocationPriority = 1; let HasSGPR = 1; let BaseClassOrder = 64; + let Size = 64; } def SReg_1_XEXEC : SIRegisterClass<"AMDGPU", [i1], 32, @@ -909,6 +912,7 @@ def VRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v (add VGPR_32, LDS_DIRECT_CLASS)> { let isAllocatable = 0; let HasVGPR = 1; + let Size = 32; } // Register class for all vector registers (VGPRs + Interpolation Registers) @@ -1009,12 +1013,14 @@ def VS_16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16, (add VGPR_16, SReg_32, LDS_DIRECT_CLASS)> { let isAllocatable = 0; let HasVGPR = 1; + let Size = 16; } def VS_16_Lo128 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16, (add VGPR_16_Lo128, SReg_32, LDS_DIRECT_CLASS)> { let isAllocatable = 0; let HasVGPR = 1; + let Size = 16; } def VS_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32, @@ -1022,6 +1028,7 @@ def VS_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v let isAllocatable = 0; let HasVGPR = 1; let HasSGPR = 1; + let Size = 32; } def VS_32_Lo128 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32, @@ -1029,17 +1036,20 @@ def VS_32_Lo128 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2 let isAllocatable = 0; let HasVGPR = 1; let HasSGPR = 1; + let Size = 32; } def VS_64 : SIRegisterClass<"AMDGPU", VReg_64.RegTypes, 32, (add VReg_64, SReg_64)> { let isAllocatable = 0; let HasVGPR = 1; let HasSGPR = 1; + let Size = 64; } def AV_32 : SIRegisterClass<"AMDGPU", VGPR_32.RegTypes, 32, (add VGPR_32, AGPR_32)> { let HasVGPR = 1; let HasAGPR = 1; + let Size = 32; } } // End GeneratePressureSet = 0 @@ -1097,57 +1107,55 @@ class RegOrImmOperand // SSrc_* Operands with an SGPR or a 32-bit immediate //===----------------------------------------------------------------------===// -class SrcRegOrImm9 +class SrcRegOrImm9 : RegOrImmOperand { string DecoderMethodName = "decodeSrcRegOrImm9"; - let DecoderMethod = DecoderMethodName # ""; + let DecoderMethod = DecoderMethodName # "<" # regClass.Size # ">"; } class SrcRegOrImm9_t16 - : SrcRegOrImm9 { + : SrcRegOrImm9 { let DecoderMethodName = "decodeOperand_VSrcT16"; let EncoderMethod = "getMachineOpValueT16"; } -def SSrc_b16 : SrcRegOrImm9 ; -def SSrc_bf16: SrcRegOrImm9 ; -def SSrc_f16 : SrcRegOrImm9 ; -def SSrc_b32 : SrcRegOrImm9 ; -def SSrc_f32 : SrcRegOrImm9 ; -def SSrc_b64 : SrcRegOrImm9 ; +def SSrc_b16 : SrcRegOrImm9 ; +def SSrc_bf16: SrcRegOrImm9 ; +def SSrc_f16 : SrcRegOrImm9 ; +def SSrc_b32 : SrcRegOrImm9 ; +def SSrc_f32 : SrcRegOrImm9 ; +def SSrc_b64 : SrcRegOrImm9 ; -def SSrcOrLds_b32 : SrcRegOrImm9 ; +def SSrcOrLds_b32 : SrcRegOrImm9 ; //===----------------------------------------------------------------------===// // SSrc_32_Deferred Operands with an SGPR or a 32-bit immediate for use with // FMAMK/FMAAK //===----------------------------------------------------------------------===// -class SrcRegOrImmDeferred9 +class SrcRegOrImmDeferred9 : RegOrImmOperand { string DecoderMethodName = "decodeSrcRegOrImmDeferred9"; - let DecoderMethod = DecoderMethodName # ""; + let DecoderMethod = DecoderMethodName # "<" # regClass.Size # ">"; } -def SSrc_f32_Deferred : SrcRegOrImmDeferred9; +def SSrc_f32_Deferred : SrcRegOrImmDeferred9; //===----------------------------------------------------------------------===// // SCSrc_* Operands with an SGPR or a inline constant //===----------------------------------------------------------------------===// -def SCSrc_b32 : SrcRegOrImm9 ; -def SCSrc_b64 : SrcRegOrImm9 ; +def SCSrc_b32 : SrcRegOrImm9 ; +def SCSrc_b64 : SrcRegOrImm9 ; //===----------------------------------------------------------------------===// // VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate //===----------------------------------------------------------------------===// // The current and temporary future default used case for VOP3. -def VSrc_b16 : SrcRegOrImm9 ; -def VSrc_bf16 : SrcRegOrImm9 ; -def VSrc_f16 : SrcRegOrImm9 ; +def VSrc_b16 : SrcRegOrImm9 ; +def VSrc_bf16 : SrcRegOrImm9 ; +def VSrc_f16 : SrcRegOrImm9 ; // True16 VOP3 operands. def VSrcT_b16 : SrcRegOrImm9_t16 <"OPERAND_REG_IMM_INT16">; @@ -1163,42 +1171,42 @@ let DecoderMethodName = "decodeOperand_VSrcT16_Lo128", EncoderMethod = "getMachi // The current and temporary future default used case for fake VOP1/2/C. // For VOP1,2,C True16 instructions. _Lo128 use first 128 32-bit VGPRs only. -def VSrcFake16_b16_Lo128 : SrcRegOrImm9 ; -def VSrcFake16_bf16_Lo128 : SrcRegOrImm9 ; -def VSrcFake16_f16_Lo128 : SrcRegOrImm9 ; - -def VSrc_b32 : SrcRegOrImm9 ; -def VSrc_f32 : SrcRegOrImm9 ; -def VSrc_v2b16 : SrcRegOrImm9 ; -def VSrc_v2bf16 : SrcRegOrImm9 ; -def VSrc_v2f16 : SrcRegOrImm9 ; -def VSrc_b64 : SrcRegOrImm9 ; -def VSrc_f64 : SrcRegOrImm9 { +def VSrcFake16_b16_Lo128 : SrcRegOrImm9 ; +def VSrcFake16_bf16_Lo128 : SrcRegOrImm9 ; +def VSrcFake16_f16_Lo128 : SrcRegOrImm9 ; + +def VSrc_b32 : SrcRegOrImm9 ; +def VSrc_f32 : SrcRegOrImm9 ; +def VSrc_v2b16 : SrcRegOrImm9 ; +def VSrc_v2bf16 : SrcRegOrImm9 ; +def VSrc_v2f16 : SrcRegOrImm9 ; +def VSrc_b64 : SrcRegOrImm9 ; +def VSrc_f64 : SrcRegOrImm9 { let DecoderMethod = "decodeOperand_VSrc_f64"; } -def VSrc_v2b32 : SrcRegOrImm9 ; -def VSrc_v2f32 : SrcRegOrImm9 ; +def VSrc_v2b32 : SrcRegOrImm9 ; +def VSrc_v2f32 : SrcRegOrImm9 ; //===----------------------------------------------------------------------===// // VSrc_*_Deferred Operands with an SGPR, VGPR or a 32-bit immediate for use // with FMAMK/FMAAK //===----------------------------------------------------------------------===// -def VSrc_bf16_Deferred : SrcRegOrImmDeferred9; -def VSrc_f16_Deferred : SrcRegOrImmDeferred9; -def VSrc_f32_Deferred : SrcRegOrImmDeferred9; +def VSrc_bf16_Deferred : SrcRegOrImmDeferred9; +def VSrc_f16_Deferred : SrcRegOrImmDeferred9; +def VSrc_f32_Deferred : SrcRegOrImmDeferred9; // True 16 Operands -def VSrcT_f16_Lo128_Deferred : SrcRegOrImmDeferred9 { let DecoderMethodName = "decodeOperand_VSrcT16_Lo128_Deferred"; let EncoderMethod = "getMachineOpValueT16Lo128"; } def VSrcFake16_bf16_Lo128_Deferred - : SrcRegOrImmDeferred9; + : SrcRegOrImmDeferred9; def VSrcFake16_f16_Lo128_Deferred - : SrcRegOrImmDeferred9; + : SrcRegOrImmDeferred9; //===----------------------------------------------------------------------===// // VRegSrc_* Operands with a VGPR @@ -1206,26 +1214,26 @@ def VSrcFake16_f16_Lo128_Deferred // This is for operands with the enum(9), VSrc encoding restriction, // but only allows VGPRs. -class SrcReg9 : RegisterOperand { - let DecoderMethod = "decodeSrcReg9"; +class SrcReg9 : RegisterOperand { + let DecoderMethod = "decodeSrcReg9<" # regClass.Size # ">"; } -def VRegSrc_32 : SrcReg9; -def VRegSrc_64 : SrcReg9; -def VRegSrc_96 : SrcReg9; -def VRegSrc_128: SrcReg9; -def VRegSrc_192: SrcReg9; -def VRegSrc_256: SrcReg9; -def VRegSrc_512: SrcReg9; -def VRegSrc_1024: SrcReg9; -def VRegOrLdsSrc_32 : SrcReg9; +def VRegSrc_32 : SrcReg9; +def VRegSrc_64 : SrcReg9; +def VRegSrc_96 : SrcReg9; +def VRegSrc_128: SrcReg9; +def VRegSrc_192: SrcReg9; +def VRegSrc_256: SrcReg9; +def VRegSrc_512: SrcReg9; +def VRegSrc_1024: SrcReg9; +def VRegOrLdsSrc_32 : SrcReg9; // True 16 Operands def VRegSrc_16 : RegisterOperand { let DecoderMethod = "decodeOperand_VGPR_16"; let EncoderMethod = "getMachineOpValueT16"; } -def VRegSrc_fake16: SrcReg9 { +def VRegSrc_fake16: SrcReg9 { let EncoderMethod = "getMachineOpValueT16"; } //===----------------------------------------------------------------------===// @@ -1259,26 +1267,26 @@ def VGPRSrc_16 : RegisterOperand { // ASrc_* Operands with an AccVGPR //===----------------------------------------------------------------------===// -class AVOperand +class AVOperand : RegisterOperand { - let DecoderMethod = decoder # ""; + let DecoderMethod = decoder # "<" # regClass.Size # ">"; let EncoderMethod = "getAVOperandEncoding"; } -def ARegSrc_32 : AVOperand; +def ARegSrc_32 : AVOperand; //===----------------------------------------------------------------------===// // VCSrc_* Operands with an SGPR, VGPR or an inline constant //===----------------------------------------------------------------------===// -def VCSrc_b16 : SrcRegOrImm9 ; -def VCSrc_bf16 : SrcRegOrImm9 ; -def VCSrc_f16 : SrcRegOrImm9 ; -def VCSrc_b32 : SrcRegOrImm9 ; -def VCSrc_f32 : SrcRegOrImm9 ; -def VCSrc_v2b16 : SrcRegOrImm9 ; -def VCSrc_v2bf16: SrcRegOrImm9 ; -def VCSrc_v2f16 : SrcRegOrImm9 ; +def VCSrc_b16 : SrcRegOrImm9 ; +def VCSrc_bf16 : SrcRegOrImm9 ; +def VCSrc_f16 : SrcRegOrImm9 ; +def VCSrc_b32 : SrcRegOrImm9 ; +def VCSrc_f32 : SrcRegOrImm9 ; +def VCSrc_v2b16 : SrcRegOrImm9 ; +def VCSrc_v2bf16: SrcRegOrImm9 ; +def VCSrc_v2f16 : SrcRegOrImm9 ; // True 16 Operands def VCSrcT_b16 : SrcRegOrImm9_t16 <"OPERAND_REG_INLINE_C_INT16">; @@ -1288,66 +1296,66 @@ def VCSrcT_f16 : SrcRegOrImm9_t16 <"OPERAND_REG_INLINE_C_FP16">; // VISrc_* Operands with a VGPR or an inline constant //===----------------------------------------------------------------------===// -def VISrc_64_bf16 : SrcRegOrImm9 ; -def VISrc_64_f16 : SrcRegOrImm9 ; -def VISrc_64_b32 : SrcRegOrImm9 ; -def VISrc_64_f64 : SrcRegOrImm9 ; -def VISrc_128_bf16 : SrcRegOrImm9 ; -def VISrc_128_f16 : SrcRegOrImm9 ; -def VISrc_128_b32 : SrcRegOrImm9 ; -def VISrc_128_f32 : SrcRegOrImm9 ; -def VISrc_256_b32 : SrcRegOrImm9 ; -def VISrc_256_f32 : SrcRegOrImm9 ; -def VISrc_256_f64 : SrcRegOrImm9 ; -def VISrc_512_b32 : SrcRegOrImm9 ; -def VISrc_512_f32 : SrcRegOrImm9 ; -def VISrc_1024_b32 : SrcRegOrImm9 ; -def VISrc_1024_f32 : SrcRegOrImm9 ; +def VISrc_64_bf16 : SrcRegOrImm9 ; +def VISrc_64_f16 : SrcRegOrImm9 ; +def VISrc_64_b32 : SrcRegOrImm9 ; +def VISrc_64_f64 : SrcRegOrImm9 ; +def VISrc_128_bf16 : SrcRegOrImm9 ; +def VISrc_128_f16 : SrcRegOrImm9 ; +def VISrc_128_b32 : SrcRegOrImm9 ; +def VISrc_128_f32 : SrcRegOrImm9 ; +def VISrc_256_b32 : SrcRegOrImm9 ; +def VISrc_256_f32 : SrcRegOrImm9 ; +def VISrc_256_f64 : SrcRegOrImm9 ; +def VISrc_512_b32 : SrcRegOrImm9 ; +def VISrc_512_f32 : SrcRegOrImm9 ; +def VISrc_1024_b32 : SrcRegOrImm9 ; +def VISrc_1024_f32 : SrcRegOrImm9 ; //===----------------------------------------------------------------------===// // AVSrc_*, AVDst_*, AVLdSt_* Operands with an AGPR or VGPR //===----------------------------------------------------------------------===// -class AVSrcOperand - : AVOperand; +class AVSrcOperand + : AVOperand; -def AVSrc_32 : AVSrcOperand; -def AVSrc_64 : AVSrcOperand; -def AVSrc_128 : AVSrcOperand; -def AVSrc_192 : AVSrcOperand; -def AVSrc_256 : AVSrcOperand; +def AVSrc_32 : AVSrcOperand; +def AVSrc_64 : AVSrcOperand; +def AVSrc_128 : AVSrcOperand; +def AVSrc_192 : AVSrcOperand; +def AVSrc_256 : AVSrcOperand; -class AVDstOperand - : AVOperand; +class AVDstOperand + : AVOperand; -def AVDst_128 : AVDstOperand; -def AVDst_256 : AVDstOperand; -def AVDst_512 : AVDstOperand; +def AVDst_128 : AVDstOperand; +def AVDst_256 : AVDstOperand; +def AVDst_512 : AVDstOperand; -class AVLdStOperand - : AVOperand; +class AVLdStOperand + : AVOperand; -def AVLdSt_32 : AVLdStOperand; -def AVLdSt_64 : AVLdStOperand; -def AVLdSt_96 : AVLdStOperand; -def AVLdSt_128 : AVLdStOperand; -def AVLdSt_160 : AVLdStOperand; -def AVLdSt_1024 : AVLdStOperand; +def AVLdSt_32 : AVLdStOperand; +def AVLdSt_64 : AVLdStOperand; +def AVLdSt_96 : AVLdStOperand; +def AVLdSt_128 : AVLdStOperand; +def AVLdSt_160 : AVLdStOperand; +def AVLdSt_1024 : AVLdStOperand; //===----------------------------------------------------------------------===// // ACSrc_* Operands with an AGPR or an inline constant //===----------------------------------------------------------------------===// -class SrcRegOrImmA9 +class SrcRegOrImmA9 : RegOrImmOperand { - let DecoderMethod = "decodeSrcRegOrImmA9"; + let DecoderMethod = "decodeSrcRegOrImmA9<" # regClass.Size # ">"; } -def AISrc_64_f64 : SrcRegOrImmA9 ; -def AISrc_128_f32 : SrcRegOrImmA9 ; -def AISrc_128_b32 : SrcRegOrImmA9 ; -def AISrc_256_f64 : SrcRegOrImmA9 ; -def AISrc_512_f32 : SrcRegOrImmA9 ; -def AISrc_512_b32 : SrcRegOrImmA9 ; -def AISrc_1024_f32 : SrcRegOrImmA9 ; -def AISrc_1024_b32 : SrcRegOrImmA9 ; +def AISrc_64_f64 : SrcRegOrImmA9 ; +def AISrc_128_f32 : SrcRegOrImmA9 ; +def AISrc_128_b32 : SrcRegOrImmA9 ; +def AISrc_256_f64 : SrcRegOrImmA9 ; +def AISrc_512_f32 : SrcRegOrImmA9 ; +def AISrc_512_b32 : SrcRegOrImmA9 ; +def AISrc_1024_f32 : SrcRegOrImmA9 ; +def AISrc_1024_b32 : SrcRegOrImmA9 ;