diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp index b78952ca3a622..43b3bf43fe56d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp @@ -15,6 +15,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" +#include "AMDGPUGenSearchableTables.inc" #include "GCNSubtarget.h" #include "SIInstrInfo.h" #include "SIRegisterInfo.h" @@ -214,12 +215,14 @@ class AMDGPUInsertSingleUseVDST : public MachineFunctionPass { RegisterUseCount[Unit]++; // Do not attempt to optimise across exec mask changes. - if (MI.modifiesRegister(AMDGPU::EXEC, TRI)) { + if (MI.modifiesRegister(AMDGPU::EXEC, TRI) || + AMDGPU::isInvalidSingleUseConsumerInst(MI.getOpcode())) { for (auto &UsedReg : RegisterUseCount) UsedReg.second = 2; } - if (!SIInstrInfo::isVALU(MI)) + if (!SIInstrInfo::isVALU(MI) || + AMDGPU::isInvalidSingleUseProducerInst(MI.getOpcode())) continue; if (AllProducerOperandsAreSingleUse) { SingleUseProducerPositions.push_back({VALUInstrCount, &MI}); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 0ed2f60ea66a7..f2dd86ec4e711 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2266,6 +2266,8 @@ class VOPProfile _ArgVT, bit _EnableClamp = 0> { field bit EnableClamp = _EnableClamp; field bit IsTrue16 = 0; field bit IsRealTrue16 = 0; + field bit IsInvalidSingleUseConsumer = 0; + field bit IsInvalidSingleUseProducer = 0; field ValueType DstVT = ArgVT[0]; field ValueType Src0VT = ArgVT[1]; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 2beaf903542bd..7b841888cba1d 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -372,10 +372,18 @@ struct VOPTrue16Info { bool IsTrue16; }; +struct SingleUseExceptionInfo { + uint16_t Opcode; + bool IsInvalidSingleUseConsumer; + bool IsInvalidSingleUseProducer; +}; + #define GET_MTBUFInfoTable_DECL #define GET_MTBUFInfoTable_IMPL #define GET_MUBUFInfoTable_DECL #define GET_MUBUFInfoTable_IMPL +#define GET_SingleUseExceptionTable_DECL +#define GET_SingleUseExceptionTable_IMPL #define GET_SMInfoTable_DECL #define GET_SMInfoTable_IMPL #define GET_VOP1InfoTable_DECL @@ -607,6 +615,16 @@ bool isTrue16Inst(unsigned Opc) { return Info ? Info->IsTrue16 : false; } +bool isInvalidSingleUseConsumerInst(unsigned Opc) { + const SingleUseExceptionInfo *Info = getSingleUseExceptionHelper(Opc); + return Info && Info->IsInvalidSingleUseConsumer; +} + +bool isInvalidSingleUseProducerInst(unsigned Opc) { + const SingleUseExceptionInfo *Info = getSingleUseExceptionHelper(Opc); + return Info && Info->IsInvalidSingleUseProducer; +} + unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) { const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc); return Info ? Info->Opcode3Addr : ~0u; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index fc4147df76e3e..7478ef0ffad86 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -854,6 +854,12 @@ getVOPDInstInfo(unsigned VOPDOpcode, const MCInstrInfo *InstrInfo); LLVM_READONLY bool isTrue16Inst(unsigned Opc); +LLVM_READONLY +bool isInvalidSingleUseConsumerInst(unsigned Opc); + +LLVM_READONLY +bool isInvalidSingleUseProducerInst(unsigned Opc); + LLVM_READONLY unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc); diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 012dca22eb4fe..12ee8e12ec5e3 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -246,6 +246,7 @@ def V_READFIRSTLANE_B32 : VOP1_Pseudo <"v_readfirstlane_b32", VOP_READFIRSTLANE, getVOP1Pat.ret, 1> { let isConvergent = 1; + let IsInvalidSingleUseConsumer = 1; } let isReMaterializable = 1 in { @@ -356,6 +357,7 @@ defm V_CLREXCP : VOP1Inst <"v_clrexcp", VOP_NO_EXT>; def VOP_MOVRELS : VOPProfile<[i32, i32, untyped, untyped]> { let Src0RC32 = VRegSrc_32; let Src0RC64 = VRegSrc_32; + let IsInvalidSingleUseConsumer = 1; } // Special case because there are no true output operands. Hack vdst @@ -399,8 +401,12 @@ class VOP_MOVREL : VOPProfile<[untyped, i32, untyped, un let EmitDst = 1; // force vdst emission } -def VOP_MOVRELD : VOP_MOVREL; -def VOP_MOVRELSD : VOP_MOVREL; +let IsInvalidSingleUseProducer = 1 in { + def VOP_MOVRELD : VOP_MOVREL; + def VOP_MOVRELSD : VOP_MOVREL { + let IsInvalidSingleUseConsumer = 1; + } +} let SubtargetPredicate = HasMovrel, Uses = [M0, EXEC] in { // v_movreld_b32 is a special case because the destination output @@ -529,6 +535,7 @@ let SubtargetPredicate = isGFX9Plus in { let Constraints = "$vdst = $src1, $vdst1 = $src0"; let DisableEncoding = "$vdst1,$src1"; let SchedRW = [Write64Bit, Write64Bit]; + let IsInvalidSingleUseConsumer = 1; } let isReMaterializable = 1 in @@ -693,6 +700,8 @@ let SubtargetPredicate = isGFX10Plus in { let Constraints = "$vdst = $src1, $vdst1 = $src0"; let DisableEncoding = "$vdst1,$src1"; let SchedRW = [Write64Bit, Write64Bit]; + let IsInvalidSingleUseConsumer = 1; + let IsInvalidSingleUseProducer = 1; } } // End Uses = [M0] } // End SubtargetPredicate = isGFX10Plus @@ -714,7 +723,10 @@ let SubtargetPredicate = isGFX11Plus in { def V_PERMLANE64_B32 : VOP1_Pseudo<"v_permlane64_b32", VOP_MOVRELS, getVOP1Pat.ret, - /*VOP1Only=*/ 1>; + /*VOP1Only=*/ 1> { + let IsInvalidSingleUseConsumer = 1; + let IsInvalidSingleUseProducer = 1; + } defm V_MOV_B16_t16 : VOP1Inst<"v_mov_b16_t16", VOPProfile_True16>; defm V_NOT_B16 : VOP1Inst_t16<"v_not_b16", VOP_I16_I16>; defm V_CVT_I32_I16 : VOP1Inst_t16<"v_cvt_i32_i16", VOP_I32_I16>; diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index d2af1753d5503..4d3411d16a383 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -779,12 +779,14 @@ defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag, } // End isCommutable = 1 // These are special and do not read the exec mask. -let isConvergent = 1, Uses = [] in { +let isConvergent = 1, Uses = [], IsInvalidSingleUseConsumer = 1 in { def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE, [(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))]>; let IsNeverUniform = 1, Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in { def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE, - [(set i32:$vdst, (int_amdgcn_writelane i32:$src0, i32:$src1, i32:$vdst_in))]>; + [(set i32:$vdst, (int_amdgcn_writelane i32:$src0, i32:$src1, i32:$vdst_in))]> { + let IsInvalidSingleUseProducer = 1; + } } // End IsNeverUniform, $vdst = $vdst_in, DisableEncoding $vdst_in } // End isConvergent = 1 diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 616bc7684753e..fd5f1b71ce331 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -155,12 +155,12 @@ defm V_MAX_F64 : VOP3Inst <"v_max_f64", VOP3_Profile, fmaxnum_l } // End SubtargetPredicate = isNotGFX12Plus } // End SchedRW = [WriteDoubleAdd] -let SchedRW = [WriteIntMul] in { +let SchedRW = [WriteIntMul], IsInvalidSingleUseConsumer = 1 in { defm V_MUL_LO_U32 : VOP3Inst <"v_mul_lo_u32", V_MUL_PROF, DivergentBinFrag>; defm V_MUL_HI_U32 : VOP3Inst <"v_mul_hi_u32", V_MUL_PROF, mulhu>; defm V_MUL_LO_I32 : VOP3Inst <"v_mul_lo_i32", V_MUL_PROF>; defm V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", V_MUL_PROF, mulhs>; -} // End SchedRW = [WriteIntMul] +} // End SchedRW = [WriteIntMul], IsInvalidSingleUseConsumer = 1 let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in { defm V_MINIMUM_F32 : VOP3Inst <"v_minimum_f32", VOP3_Profile, DivergentBinFrag>; @@ -258,9 +258,9 @@ let mayRaiseFPException = 0 in { // Seems suspicious but manual doesn't say it d let isReMaterializable = 1 in defm V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile>; -let Constraints = "@earlyclobber $vdst" in { +let Constraints = "@earlyclobber $vdst", IsInvalidSingleUseConsumer = 1 in { defm V_MQSAD_PK_U16_U8 : VOP3Inst <"v_mqsad_pk_u16_u8", VOP3_Profile>; -} // End Constraints = "@earlyclobber $vdst" +} // End Constraints = "@earlyclobber $vdst", IsInvalidSingleUseConsumer = 1 let isReMaterializable = 1 in { @@ -275,14 +275,16 @@ let SchedRW = [Write64Bit] in { defm V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile, csra_64>; } // End SubtargetPredicate = isGFX6GFX7 + let IsInvalidSingleUseConsumer = 1 in { let SubtargetPredicate = isGFX8Plus in { defm V_LSHRREV_B64 : VOP3Inst <"v_lshrrev_b64", VOP3_Profile, clshr_rev_64>; defm V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile, cashr_rev_64>; - } // End SubtargetPredicate = isGFX8Plus + } // End SubtargetPredicate = isGFX8Plus, , IsInvalidSingleUseConsumer = 1 let SubtargetPredicate = isGFX8GFX9GFX10GFX11 in { defm V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile, clshl_rev_64>; } // End SubtargetPredicate = isGFX8GFX9GFX10GFX11 + } // End IsInvalidSingleUseConsumer = 1 } // End SchedRW = [Write64Bit] } // End isReMaterializable = 1 @@ -307,14 +309,14 @@ def VOPProfileMQSAD : VOP3_Profile { let HasModifiers = 0; } -let SubtargetPredicate = isGFX7Plus in { +let SubtargetPredicate = isGFX7Plus, IsInvalidSingleUseConsumer = 1 in { let Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] in { defm V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile>; defm V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOPProfileMQSAD>; } // End Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] -} // End SubtargetPredicate = isGFX7Plus +} // End SubtargetPredicate = isGFX7Plus, IsInvalidSingleUseConsumer = 1 -let isCommutable = 1, SchedRW = [WriteIntMul, WriteSALU] in { +let isCommutable = 1, SchedRW = [WriteIntMul, WriteSALU], IsInvalidSingleUseConsumer = 1 in { let SubtargetPredicate = isGFX7Plus, OtherPredicates = [HasNotMADIntraFwdBug] in { defm V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>; defm V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>; @@ -324,7 +326,7 @@ let isCommutable = 1, SchedRW = [WriteIntMul, WriteSALU] in { defm V_MAD_U64_U32_gfx11 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>; defm V_MAD_I64_I32_gfx11 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>; } -} // End isCommutable = 1, SchedRW = [WriteIntMul, WriteSALU] +} // End isCommutable = 1, SchedRW = [WriteIntMul, WriteSALU], IsInvalidSingleUseConsumer = 1 let FPDPRounding = 1 in { @@ -859,10 +861,10 @@ let SubtargetPredicate = isGFX10Plus in { } // End isCommutable = 1, isReMaterializable = 1 def : ThreeOp_i32_Pats; - let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in { + let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in", IsInvalidSingleUseConsumer = 1, IsInvalidSingleUseProducer = 1 in { defm V_PERMLANE16_B32 : VOP3Inst<"v_permlane16_b32", VOP3_PERMLANE_Profile>; defm V_PERMLANEX16_B32 : VOP3Inst<"v_permlanex16_b32", VOP3_PERMLANE_Profile>; - } // End $vdst = $vdst_in, DisableEncoding $vdst_in + } // End $vdst = $vdst_in, DisableEncoding $vdst_in, IsInvalidSingleUseConsumer = 1, IsInvalidSingleUseProducer = 1 def : PermlanePat; def : PermlanePat; @@ -1275,11 +1277,12 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { } } // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" -defm V_READLANE_B32 : VOP3_Real_No_Suffix_gfx10<0x360>; - -let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in { - defm V_WRITELANE_B32 : VOP3_Real_No_Suffix_gfx10<0x361>; -} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) +let IsInvalidSingleUseConsumer = 1 in { + defm V_READLANE_B32 : VOP3_Real_No_Suffix_gfx10<0x360>; + let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in), IsInvalidSingleUseProducer = 1 in { + defm V_WRITELANE_B32 : VOP3_Real_No_Suffix_gfx10<0x361>; + } // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32: $src1, VGPR_32:$vdst_in), IsInvalidSingleUseProducer = 1 +} // End IsInvalidSingleUseConsumer = 1 let SubtargetPredicate = isGFX10Before1030 in { defm V_MUL_LO_I32 : VOP3_Real_gfx10<0x16b>; diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index c3bdbbfc38462..310ad3d731f1d 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -382,15 +382,19 @@ defm V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16", AMDGPUfdot2, 1/*ExplicitClamp*/>; let OtherPredicates = [HasDot7Insts] in { -defm V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8", - VOP3P_Profile, int_amdgcn_udot4, 1>; +let IsInvalidSingleUseConsumer = 1 in { + defm V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8", + VOP3P_Profile, int_amdgcn_udot4, 1>; +} defm V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4", VOP3P_Profile, int_amdgcn_udot8, 1>; } // End OtherPredicates = [HasDot7Insts] let OtherPredicates = [HasDot1Insts] in { -defm V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8", - VOP3P_Profile, int_amdgcn_sdot4, 1>; +let IsInvalidSingleUseConsumer = 1 in { + defm V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8", + VOP3P_Profile, int_amdgcn_sdot4, 1>; +} defm V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4", VOP3P_Profile, int_amdgcn_sdot8, 1>; } // End OtherPredicates = [HasDot1Insts] diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td index ddd6d8b074aa3..33dddf850ad21 100644 --- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td @@ -435,8 +435,10 @@ multiclass VOPC_I16 : VOPC_Pseudos ; -multiclass VOPC_I64 : - VOPC_Pseudos ; +let IsInvalidSingleUseConsumer = 1 in { + multiclass VOPC_I64 : + VOPC_Pseudos ; +} multiclass VOPCX_F16 { let OtherPredicates = [Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in { @@ -465,8 +467,10 @@ multiclass VOPCX_I16 { multiclass VOPCX_I32 : VOPCX_Pseudos ; -multiclass VOPCX_I64 : - VOPCX_Pseudos ; +let IsInvalidSingleUseConsumer = 1 in { + multiclass VOPCX_I64 : + VOPCX_Pseudos ; +} //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index f45ab9bf46db1..c8f794322b677 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -17,6 +17,8 @@ class LetDummies { bit isReMaterializable; bit isAsCheapAsAMove; bit FPDPRounding; + bit IsInvalidSingleUseConsumer; + bit IsInvalidSingleUseProducer; Predicate SubtargetPredicate; string Constraints; string DisableEncoding; @@ -81,6 +83,8 @@ class VOP_Pseudo (NAME); bit IsTrue16 = P.IsTrue16; + bit IsInvalidSingleUseConsumer = P.IsInvalidSingleUseConsumer; + bit IsInvalidSingleUseProducer = P.IsInvalidSingleUseProducer; VOPProfile Pfl = P; string AsmOperands; @@ -175,6 +179,8 @@ class VOP3P_Pseudo pattern = []> : class VOP_Real { Instruction Opcode = !cast(NAME); bit IsSingle = ps.Pfl.IsSingle; + bit IsInvalidSingleUseConsumer = ps.Pfl.IsInvalidSingleUseConsumer; + bit IsInvalidSingleUseProducer = ps.Pfl.IsInvalidSingleUseProducer; } class VOP3_Real : @@ -819,9 +825,7 @@ class VOP3P_DPPe_Common op, VOPProfile P> : VOP3P_DPPe_Common_Base pattern=[], dag Ins = P.InsDPP, string asmOps = P.AsmDPP> : - InstSI , - VOP , - SIMCInstr { + VOP_Pseudo { let isPseudo = 1; let isCodeGenOnly = 1; @@ -853,6 +857,9 @@ class VOP_DPP_Pseudo pattern=[], let DisableEncoding = !if(P.NumSrcArgs, P.TieRegDPP, ""); let DecoderNamespace = "GFX8"; + let IsInvalidSingleUseConsumer = !not(VINTERP); + let IsInvalidSingleUseProducer = !not(VINTERP); + VOPProfile Pfl = P; } @@ -1719,3 +1726,12 @@ def VOPTrue16Table : GenericTable { let PrimaryKey = ["Opcode"]; let PrimaryKeyName = "getTrue16OpcodeHelper"; } + +def SingleUseExceptionTable : GenericTable { + let FilterClass = "VOP_Pseudo"; + let CppTypeName = "SingleUseExceptionInfo"; + let Fields = ["Opcode", "IsInvalidSingleUseConsumer", "IsInvalidSingleUseProducer"]; + + let PrimaryKey = ["Opcode"]; + let PrimaryKeyName = "getSingleUseExceptionHelper"; +} diff --git a/llvm/test/CodeGen/AMDGPU/insert-singleuse-vdst.mir b/llvm/test/CodeGen/AMDGPU/insert-singleuse-vdst.mir index f2a5139b73b10..9e65ce329df43 100644 --- a/llvm/test/CodeGen/AMDGPU/insert-singleuse-vdst.mir +++ b/llvm/test/CodeGen/AMDGPU/insert-singleuse-vdst.mir @@ -1,6 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 -# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs -run-pass=amdgpu-insert-single-use-vdst %s -o - | FileCheck %s -# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -run-pass=amdgpu-insert-single-use-vdst %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -verify-machineinstrs -run-pass=amdgpu-insert-single-use-vdst %s -o - | FileCheck %s # One single-use producer. --- @@ -38,14 +37,14 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: S_SINGLEUSE_VDST 1 ; CHECK-NEXT: $vgpr2_vgpr3 = V_LSHLREV_B64_e64 0, $vgpr0_vgpr1, implicit $exec - ; CHECK-NEXT: $vgpr4_vgpr5 = V_LSHLREV_B64_e64 0, $vgpr2_vgpr3, implicit $exec + ; CHECK-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e64 $vgpr2_vgpr3, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: liveins: $vgpr4_vgpr5 bb.0: liveins: $vgpr0_vgpr1 $vgpr2_vgpr3 = V_LSHLREV_B64_e64 0, $vgpr0_vgpr1, implicit $exec - $vgpr4_vgpr5 = V_LSHLREV_B64_e64 0, $vgpr2_vgpr3, implicit $exec + $vgpr4_vgpr5 = V_MOV_B64_e64 $vgpr2_vgpr3, implicit $exec bb.1: liveins: $vgpr4_vgpr5 ... @@ -1238,3 +1237,184 @@ body: | liveins: $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr30, $vgpr31, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36 ... + +# Tests for multi-cycle instructions that are explicitly excluded. + +# Valid producers but invalid consumer opcodes. +--- +name: v_mul_hi_u32_e64 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: v_mul_hi_u32_e64 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec + ; CHECK-NEXT: S_SINGLEUSE_VDST 1 + ; CHECK-NEXT: $vgpr2 = V_MUL_HI_U32_e64 $vgpr0, $vgpr1, implicit $exec + ; CHECK-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: liveins: $vgpr0, $vgpr3 + bb.0: + liveins: $vgpr0 + $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec + $vgpr2 = V_MUL_HI_U32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec + bb.1: + liveins: $vgpr0, $vgpr3 +... + +--- +name: v_cmpx_t_u64_e64 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: v_cmpx_t_u64_e64 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec + ; CHECK-NEXT: S_SINGLEUSE_VDST 1 + ; CHECK-NEXT: $sgpr0 = V_CMPX_EQ_U64_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, implicit-def $exec, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: liveins: $vgpr0 + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec + $sgpr0 = V_CMPX_EQ_U64_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, implicit-def $exec, implicit $exec + S_BRANCH %bb.1 + bb.1: + liveins: $vgpr0 +... + +--- +name: v_lshlrev_b64_e64 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: v_lshlrev_b64_e64 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e64 $vgpr0_vgpr1, implicit $exec + ; CHECK-NEXT: $vgpr4_vgpr5 = V_LSHLREV_B64_e64 0, $vgpr2_vgpr3, implicit $exec + ; CHECK-NEXT: S_SINGLEUSE_VDST 1 + ; CHECK-NEXT: $vgpr6_vgpr7 = V_LSHLREV_B64_e64 0, $vgpr4_vgpr5, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: liveins: $vgpr4_vgpr5 + bb.0: + liveins: $vgpr0_vgpr1 + $vgpr2_vgpr3 = V_MOV_B64_e64 $vgpr0_vgpr1, implicit $exec + $vgpr4_vgpr5 = V_LSHLREV_B64_e64 0, $vgpr2_vgpr3, implicit $exec + $vgpr6_vgpr7 = V_LSHLREV_B64_e64 0, $vgpr4_vgpr5, implicit $exec + bb.1: + liveins: $vgpr4_vgpr5 +... + +# Invalid producers but valid consumer opcodes. +--- +name: v_movereld_b32_e32 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: v_movereld_b32_e32 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $m0 = S_MOV_B32 0 + ; CHECK-NEXT: S_SINGLEUSE_VDST 1 + ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec + ; CHECK-NEXT: V_MOVRELD_B32_e32 $vgpr2, $vgpr1, implicit $m0, implicit $exec, implicit-def $vgpr1_vgpr2, implicit undef $vgpr1_vgpr2(tied-def 4) + ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr1, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: liveins: $vgpr3 + bb.0: + liveins: $vgpr0, $vgpr2 + $m0 = S_MOV_B32 0 + $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec + V_MOVRELD_B32_e32 $vgpr2, $vgpr1, implicit $m0, implicit $exec, implicit-def $vgpr1_vgpr2, implicit undef $vgpr1_vgpr2(tied-def 4) + $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr1, implicit $exec + bb.1: + liveins: $vgpr3 +... + +# Invalid producers and invalid consumer opcodes. +--- +name: v_writelane_b32 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: v_writelane_b32 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec + ; CHECK-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr0, 0, $vgpr1 + ; CHECK-NEXT: S_SINGLEUSE_VDST 1 + ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr1, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: liveins: $vgpr0 + bb.0: + liveins: $vgpr0, $sgpr0 + $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec + $vgpr1 = V_WRITELANE_B32 $sgpr0, 0, $vgpr1 + $vgpr2 = V_MOV_B32_e32 $vgpr1, implicit $exec + bb.1: + liveins: $vgpr0 +... + +# DPP instructions cannot be single use producers or consumers +--- +name: V_ADD_NC_U32_dpp +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: V_ADD_NC_U32_dpp + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vgpr0 = V_ADDC_U32_dpp $vgpr0, $vgpr0, $vgpr0, 1, 15, 15, 1, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec + ; CHECK-NEXT: $vgpr0 = V_ADDC_U32_dpp $vgpr0, $vgpr0, $vgpr0, 1, 15, 15, 1, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec + ; CHECK-NEXT: $vgpr0 = V_ADDC_U32_dpp $vgpr0, $vgpr0, $vgpr0, 1, 15, 15, 1, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: liveins: $vgpr0 + bb.0: + liveins: $vgpr0, $vcc + $vgpr0 = V_ADDC_U32_dpp $vgpr0, $vgpr0, $vgpr0, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec + $vgpr0 = V_ADDC_U32_dpp $vgpr0, $vgpr0, $vgpr0, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec + $vgpr0 = V_ADDC_U32_dpp $vgpr0, $vgpr0, $vgpr0, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec + bb.1: + liveins: $vgpr0 +... + +# Exception to the rule that dpp instructions +# cannot be single use producers or consumers +--- +name: V_INTERP_MOV_F32 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: V_INTERP_MOV_F32 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_SINGLEUSE_VDST 1 + ; CHECK-NEXT: $vgpr0 = V_INTERP_MOV_F32 0, 0, 0, implicit $mode, implicit $m0, implicit $exec + ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: liveins: $vgpr1 + bb.0: + $vgpr0 = V_INTERP_MOV_F32 0, 0, 0, implicit $mode, implicit $m0, implicit $exec + $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec + bb.1: + liveins: $vgpr1 +... +