diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index e2d62a9a2cce6..cec35d1147bb0 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -485,14 +485,14 @@ class MUBUF_Load_Pseudo : Pat < +class MUBUF_Offset_Load_Pat : GCNPat < (load_vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i32:$offset))), (load_vt (inst v4i32:$srsrc, i32:$soffset, i32:$offset)) >; class MUBUF_Addr64_Load_Pat : Pat < + SDPatternOperator ld = null_frag> : GCNPat < (load_vt (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i32:$offset))), (load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i32:$offset)) >; @@ -572,18 +572,25 @@ class MUBUF_Store_Pseudo { + + def _OFFSET : GCNPat < + (st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset, i32:$offset)), + (!cast(BaseInst # _OFFSET) store_vt:$vdata, v4i32:$srsrc, i32:$soffset, i32:$offset)>; + + def _ADDR64 : GCNPat < + (st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i32:$offset)), + (!cast(BaseInst # _ADDR64) store_vt:$vdata, i64:$vaddr, v4i32:$srsrc, i32:$soffset, i32:$offset)>; +} + multiclass MUBUF_Pseudo_Stores_Helper { + bit isTFE> { defvar legal_store_vt = !if(!eq(store_vt, v3f16), v4f16, store_vt); - def _OFFSET : MUBUF_Store_Pseudo , + def _OFFSET : MUBUF_Store_Pseudo , MUBUFAddr64Table<0, NAME>; - def _ADDR64 : MUBUF_Store_Pseudo , + def _ADDR64 : MUBUF_Store_Pseudo , MUBUFAddr64Table<1, NAME>; def _OFFEN : MUBUF_Store_Pseudo ; @@ -598,10 +605,9 @@ multiclass MUBUF_Pseudo_Stores_Helper { - defm NAME : MUBUF_Pseudo_Stores_Helper; - defm _TFE : MUBUF_Pseudo_Stores_Helper; +multiclass MUBUF_Pseudo_Stores { + defm NAME : MUBUF_Pseudo_Stores_Helper; + defm _TFE : MUBUF_Pseudo_Stores_Helper; } class MUBUF_Pseudo_Store_Lds @@ -912,23 +918,31 @@ defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX3", v3i32, load_global>; defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX4", v4i32, load_global>; defm BUFFER_STORE_BYTE : MUBUF_Pseudo_Stores < - "buffer_store_byte", i32, truncstorei8_global + "buffer_store_byte", i32 >; defm BUFFER_STORE_SHORT : MUBUF_Pseudo_Stores < - "buffer_store_short", i32, truncstorei16_global + "buffer_store_short", i32 >; defm BUFFER_STORE_DWORD : MUBUF_Pseudo_Stores < - "buffer_store_dword", i32, store_global + "buffer_store_dword", i32 >; defm BUFFER_STORE_DWORDX2 : MUBUF_Pseudo_Stores < - "buffer_store_dwordx2", v2i32, store_global + "buffer_store_dwordx2", v2i32 >; defm BUFFER_STORE_DWORDX3 : MUBUF_Pseudo_Stores < - "buffer_store_dwordx3", v3i32, store_global + "buffer_store_dwordx3", v3i32 >; defm BUFFER_STORE_DWORDX4 : MUBUF_Pseudo_Stores < - "buffer_store_dwordx4", v4i32, store_global + "buffer_store_dwordx4", v4i32 >; + +defm : MUBUF_Pseudo_Store_Pats<"BUFFER_STORE_BYTE", i32, truncstorei8_global>; +defm : MUBUF_Pseudo_Store_Pats<"BUFFER_STORE_SHORT", i32, truncstorei16_global>; +defm : MUBUF_Pseudo_Store_Pats<"BUFFER_STORE_DWORD", i32, store_global>; +defm : MUBUF_Pseudo_Store_Pats<"BUFFER_STORE_DWORDX2", v2i32, store_global>; +defm : MUBUF_Pseudo_Store_Pats<"BUFFER_STORE_DWORDX3", v3i32, store_global>; +defm : MUBUF_Pseudo_Store_Pats<"BUFFER_STORE_DWORDX4", v4i32, store_global>; + defm BUFFER_ATOMIC_SWAP : MUBUF_Pseudo_Atomics < "buffer_atomic_swap", VGPR_32, i32 >; @@ -1617,69 +1631,73 @@ let SubtargetPredicate = isGFX90APlus in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f64, "BUFFER_ATOMIC_MAX_F64">; } // End SubtargetPredicate = isGFX90APlus -foreach RtnMode = ["ret", "noret"] in { - -defvar Op = !cast(SIbuffer_atomic_cmpswap - # !if(!eq(RtnMode, "ret"), "", "_noret")); -defvar InstSuffix = !if(!eq(RtnMode, "ret"), "_RTN", ""); -defvar CachePolicy = !if(!eq(RtnMode, "ret"), (set_glc $cachepolicy), - (timm:$cachepolicy)); - -defvar OffsetResDag = (!cast("BUFFER_ATOMIC_CMPSWAP_OFFSET" # InstSuffix) - (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1), - SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset, CachePolicy); -def : GCNPat< - (Op - i32:$data, i32:$cmp, v4i32:$rsrc, 0, 0, i32:$soffset, - timm:$offset, timm:$cachepolicy, 0), - !if(!eq(RtnMode, "ret"), - (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS OffsetResDag, VReg_64)), sub0), - OffsetResDag) ->; - -defvar IdxenResDag = (!cast("BUFFER_ATOMIC_CMPSWAP_IDXEN" # InstSuffix) - (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1), - VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset, - CachePolicy); -def : GCNPat< - (Op - i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex, - 0, i32:$soffset, timm:$offset, - timm:$cachepolicy, timm), - !if(!eq(RtnMode, "ret"), - (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS IdxenResDag, VReg_64)), sub0), - IdxenResDag) ->; - -defvar OffenResDag = (!cast("BUFFER_ATOMIC_CMPSWAP_OFFEN" # InstSuffix) - (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1), - VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset, - CachePolicy); -def : GCNPat< - (Op - i32:$data, i32:$cmp, v4i32:$rsrc, 0, - i32:$voffset, i32:$soffset, timm:$offset, - timm:$cachepolicy, 0), - !if(!eq(RtnMode, "ret"), - (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS OffenResDag, VReg_64)), sub0), - OffenResDag) ->; - -defvar BothenResDag = (!cast("BUFFER_ATOMIC_CMPSWAP_BOTHEN" # InstSuffix) - (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1), - (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1), - SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset, CachePolicy); -def : GCNPat< - (Op - i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex, - i32:$voffset, i32:$soffset, timm:$offset, - timm:$cachepolicy, timm), - !if(!eq(RtnMode, "ret"), - (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS BothenResDag, VReg_64)), sub0), - BothenResDag) ->; - -} // end foreach RtnMode +multiclass SIBufferAtomicCmpSwapPat { + + foreach RtnMode = ["ret", "noret"] in { + + defvar Op = !cast(SIbuffer_atomic_cmpswap + # !if(!eq(RtnMode, "ret"), "", "_noret")); + defvar InstSuffix = !if(!eq(RtnMode, "ret"), "_RTN", ""); + defvar CachePolicy = !if(!eq(RtnMode, "ret"), (set_glc $cachepolicy), + (timm:$cachepolicy)); + + defvar OffsetResDag = (!cast(Inst # "_OFFSET" # InstSuffix) + (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1), + SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset, CachePolicy); + def : GCNPat< + (Op + i32:$data, i32:$cmp, v4i32:$rsrc, 0, 0, i32:$soffset, + timm:$offset, timm:$cachepolicy, 0), + !if(!eq(RtnMode, "ret"), + (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS OffsetResDag, VReg_64)), sub0), + OffsetResDag) + >; + + defvar IdxenResDag = (!cast(Inst # "_IDXEN" # InstSuffix) + (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1), + VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset, + CachePolicy); + def : GCNPat< + (Op + i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex, + 0, i32:$soffset, timm:$offset, + timm:$cachepolicy, timm), + !if(!eq(RtnMode, "ret"), + (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS IdxenResDag, VReg_64)), sub0), + IdxenResDag) + >; + + defvar OffenResDag = (!cast(Inst # "_OFFEN" # InstSuffix) + (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1), + VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset, + CachePolicy); + def : GCNPat< + (Op + i32:$data, i32:$cmp, v4i32:$rsrc, 0, + i32:$voffset, i32:$soffset, timm:$offset, + timm:$cachepolicy, 0), + !if(!eq(RtnMode, "ret"), + (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS OffenResDag, VReg_64)), sub0), + OffenResDag) + >; + + defvar BothenResDag = (!cast(Inst # "_BOTHEN" # InstSuffix) + (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1), + (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1), + SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset, CachePolicy); + def : GCNPat< + (Op + i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex, + i32:$voffset, i32:$soffset, timm:$offset, + timm:$cachepolicy, timm), + !if(!eq(RtnMode, "ret"), + (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS BothenResDag, VReg_64)), sub0), + BothenResDag) + >; + } // end foreach RtnMode +} + +defm : SIBufferAtomicCmpSwapPat<"BUFFER_ATOMIC_CMPSWAP">; class MUBUFLoad_PatternADDR64 : GCNPat < @@ -1713,91 +1731,89 @@ defm : MUBUFLoad_Atomic_Pattern ; } // End SubtargetPredicate = isGFX6GFX7 -multiclass MUBUFLoad_Pattern { def : GCNPat < (vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i32:$offset))), - (Instr_OFFSET $srsrc, $soffset, $offset) + (!cast(Instr # "_OFFSET") $srsrc, $soffset, $offset) >; } let OtherPredicates = [Has16BitInsts] in { -defm : MUBUFLoad_Pattern ; -defm : MUBUFLoad_Pattern ; -defm : MUBUFLoad_Pattern ; -defm : MUBUFLoad_Pattern ; -defm : MUBUFLoad_Pattern ; -defm : MUBUFLoad_Pattern ; +defm : MUBUFLoad_PatternOffset <"BUFFER_LOAD_SBYTE", i16, sextloadi8_constant>; +defm : MUBUFLoad_PatternOffset <"BUFFER_LOAD_UBYTE", i16, extloadi8_constant>; +defm : MUBUFLoad_PatternOffset <"BUFFER_LOAD_UBYTE", i16, zextloadi8_constant>; +defm : MUBUFLoad_PatternOffset <"BUFFER_LOAD_SBYTE", i16, sextloadi8_global>; +defm : MUBUFLoad_PatternOffset <"BUFFER_LOAD_UBYTE", i16, extloadi8_global>; +defm : MUBUFLoad_PatternOffset <"BUFFER_LOAD_UBYTE", i16, zextloadi8_global>; -defm : MUBUFLoad_Pattern ; +defm : MUBUFLoad_PatternOffset <"BUFFER_LOAD_USHORT", i16, load_global>; } // End OtherPredicates = [Has16BitInsts] -multiclass MUBUFScratchLoadPat { def : GCNPat < (vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, i32:$offset))), - (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0) + (!cast(Instr # _OFFEN) $vaddr, $srsrc, $soffset, $offset, 0, 0) >; def : GCNPat < (vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, i32:$offset))), - (InstrOffset $srsrc, $soffset, $offset, 0, 0) + (!cast(Instr # _OFFSET) $srsrc, $soffset, $offset, 0, 0) >; } // XXX - Is it possible to have a complex pattern in a PatFrag? -multiclass MUBUFScratchLoadPat_D16 { def : GCNPat < (ld_frag (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, i32:$offset), vt:$in), - (InstrOffen $vaddr, $srsrc, $soffset, $offset, $in) + (!cast(Instr # _OFFEN) $vaddr, $srsrc, $soffset, $offset, $in) >; def : GCNPat < (ld_frag (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, i32:$offset), vt:$in), - (InstrOffset $srsrc, $soffset, $offset, $in) + (!cast(Instr # _OFFSET) $srsrc, $soffset, $offset, $in) >; } let OtherPredicates = [DisableFlatScratch] in { -defm : MUBUFScratchLoadPat ; -defm : MUBUFScratchLoadPat ; -defm : MUBUFScratchLoadPat ; -defm : MUBUFScratchLoadPat ; -defm : MUBUFScratchLoadPat ; -defm : MUBUFScratchLoadPat ; -defm : MUBUFScratchLoadPat ; -defm : MUBUFScratchLoadPat ; -defm : MUBUFScratchLoadPat ; -defm : MUBUFScratchLoadPat ; +defm : MUBUFScratchLoadPat <"BUFFER_LOAD_SBYTE", i32, sextloadi8_private>; +defm : MUBUFScratchLoadPat <"BUFFER_LOAD_UBYTE", i32, extloadi8_private>; +defm : MUBUFScratchLoadPat <"BUFFER_LOAD_UBYTE", i32, zextloadi8_private>; +defm : MUBUFScratchLoadPat <"BUFFER_LOAD_SBYTE", i16, sextloadi8_private>; +defm : MUBUFScratchLoadPat <"BUFFER_LOAD_UBYTE", i16, extloadi8_private>; +defm : MUBUFScratchLoadPat <"BUFFER_LOAD_UBYTE", i16, zextloadi8_private>; +defm : MUBUFScratchLoadPat <"BUFFER_LOAD_SSHORT", i32, sextloadi16_private>; +defm : MUBUFScratchLoadPat <"BUFFER_LOAD_USHORT", i32, extloadi16_private>; +defm : MUBUFScratchLoadPat <"BUFFER_LOAD_USHORT", i32, zextloadi16_private>; +defm : MUBUFScratchLoadPat <"BUFFER_LOAD_USHORT", i16, load_private>; foreach vt = Reg32Types.types in { -defm : MUBUFScratchLoadPat ; +defm : MUBUFScratchLoadPat <"BUFFER_LOAD_DWORD", vt, load_private>; } -defm : MUBUFScratchLoadPat ; -defm : MUBUFScratchLoadPat ; -defm : MUBUFScratchLoadPat ; +defm : MUBUFScratchLoadPat <"BUFFER_LOAD_DWORDX2", v2i32, load_private>; +defm : MUBUFScratchLoadPat <"BUFFER_LOAD_DWORDX3", v3i32, load_private>; +defm : MUBUFScratchLoadPat <"BUFFER_LOAD_DWORDX4", v4i32, load_private>; let OtherPredicates = [D16PreservesUnusedBits, DisableFlatScratch] in { -defm : MUBUFScratchLoadPat_D16; -defm : MUBUFScratchLoadPat_D16; -defm : MUBUFScratchLoadPat_D16; -defm : MUBUFScratchLoadPat_D16; -defm : MUBUFScratchLoadPat_D16; -defm : MUBUFScratchLoadPat_D16; +defm : MUBUFScratchLoadPat_D16<"BUFFER_LOAD_SHORT_D16_HI", v2i16, load_d16_hi_private>; +defm : MUBUFScratchLoadPat_D16<"BUFFER_LOAD_UBYTE_D16_HI", v2i16, az_extloadi8_d16_hi_private>; +defm : MUBUFScratchLoadPat_D16<"BUFFER_LOAD_SBYTE_D16_HI", v2i16, sextloadi8_d16_hi_private>; +defm : MUBUFScratchLoadPat_D16<"BUFFER_LOAD_SHORT_D16_HI", v2f16, load_d16_hi_private>; +defm : MUBUFScratchLoadPat_D16<"BUFFER_LOAD_UBYTE_D16_HI", v2f16, az_extloadi8_d16_hi_private>; +defm : MUBUFScratchLoadPat_D16<"BUFFER_LOAD_SBYTE_D16_HI", v2f16, sextloadi8_d16_hi_private>; -defm : MUBUFScratchLoadPat_D16; -defm : MUBUFScratchLoadPat_D16; -defm : MUBUFScratchLoadPat_D16; -defm : MUBUFScratchLoadPat_D16; -defm : MUBUFScratchLoadPat_D16; -defm : MUBUFScratchLoadPat_D16; +defm : MUBUFScratchLoadPat_D16<"BUFFER_LOAD_SHORT_D16", v2i16, load_d16_lo_private>; +defm : MUBUFScratchLoadPat_D16<"BUFFER_LOAD_UBYTE_D16", v2i16, az_extloadi8_d16_lo_private>; +defm : MUBUFScratchLoadPat_D16<"BUFFER_LOAD_SBYTE_D16", v2i16, sextloadi8_d16_lo_private>; +defm : MUBUFScratchLoadPat_D16<"BUFFER_LOAD_SHORT_D16", v2f16, load_d16_lo_private>; +defm : MUBUFScratchLoadPat_D16<"BUFFER_LOAD_UBYTE_D16", v2f16, az_extloadi8_d16_lo_private>; +defm : MUBUFScratchLoadPat_D16<"BUFFER_LOAD_SBYTE_D16", v2f16, sextloadi8_d16_lo_private>; } } // End OtherPredicates = [DisableFlatScratch] @@ -1824,56 +1840,55 @@ defm : MUBUFStore_Atomic_Pattern { +multiclass MUBUFStore_PatternOffset { def : GCNPat < (st vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset, i32:$offset)), - (Instr_OFFSET $vdata, $srsrc, $soffset, $offset) + (!cast(Instr # "_OFFSET") $vdata, $srsrc, $soffset, $offset) >; } -defm : MUBUFStore_Pattern ; -defm : MUBUFStore_Pattern ; +defm : MUBUFStore_PatternOffset <"BUFFER_STORE_BYTE", i16, truncstorei8_global>; +defm : MUBUFStore_PatternOffset <"BUFFER_STORE_SHORT", i16, store_global>; -multiclass MUBUFScratchStorePat { def : GCNPat < (st vt:$value, (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, i32:$offset)), - (InstrOffen rc:$value, $vaddr, $srsrc, $soffset, $offset, 0, 0) + (!cast(Instr # _OFFEN) rc:$value, $vaddr, $srsrc, $soffset, $offset, 0, 0) >; def : GCNPat < (st vt:$value, (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, i32:$offset)), - (InstrOffset rc:$value, $srsrc, $soffset, $offset, 0, 0) + (!cast(Instr # _OFFSET) rc:$value, $srsrc, $soffset, $offset, 0, 0) >; } let OtherPredicates = [DisableFlatScratch] in { -defm : MUBUFScratchStorePat ; -defm : MUBUFScratchStorePat ; -defm : MUBUFScratchStorePat ; -defm : MUBUFScratchStorePat ; +defm : MUBUFScratchStorePat <"BUFFER_STORE_BYTE", i32, truncstorei8_private>; +defm : MUBUFScratchStorePat <"BUFFER_STORE_SHORT", i32, truncstorei16_private>; +defm : MUBUFScratchStorePat <"BUFFER_STORE_BYTE", i16, truncstorei8_private>; +defm : MUBUFScratchStorePat <"BUFFER_STORE_SHORT", i16, store_private>; foreach vt = Reg32Types.types in { -defm : MUBUFScratchStorePat ; +defm : MUBUFScratchStorePat <"BUFFER_STORE_DWORD", vt, store_private>; } -defm : MUBUFScratchStorePat ; -defm : MUBUFScratchStorePat ; -defm : MUBUFScratchStorePat ; +defm : MUBUFScratchStorePat <"BUFFER_STORE_DWORDX2", v2i32, store_private, VReg_64>; +defm : MUBUFScratchStorePat <"BUFFER_STORE_DWORDX3", v3i32, store_private, VReg_96>; +defm : MUBUFScratchStorePat <"BUFFER_STORE_DWORDX4", v4i32, store_private, VReg_128>; let OtherPredicates = [HasD16LoadStore, DisableFlatScratch] in { // Hiding the extract high pattern in the PatFrag seems to not // automatically increase the complexity. let AddedComplexity = 1 in { -defm : MUBUFScratchStorePat ; -defm : MUBUFScratchStorePat ; +defm : MUBUFScratchStorePat <"BUFFER_STORE_SHORT_D16_HI", i32, store_hi16_private>; +defm : MUBUFScratchStorePat <"BUFFER_STORE_BYTE_D16_HI", i32, truncstorei8_hi16_private>; } } } // End OtherPredicates = [DisableFlatScratch]