diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index b919c116445c8..10958f0bf97b4 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -437,6 +437,10 @@ AArch64RegisterInfo::getStrictlyReservedRegs(const MachineFunction &MF) const { if (MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening)) markSuperRegs(Reserved, AArch64::W16); + // FFR is modelled as global state that cannot be allocated. + if (MF.getSubtarget().hasSVE()) + Reserved.set(AArch64::FFR); + // SME tiles are not allocatable. if (MF.getSubtarget().hasSME()) { for (MCPhysReg SubReg : subregs_inclusive(AArch64::ZA)) diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 2b0524754b2e9..7c98f934a1317 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -488,11 +488,11 @@ def AArch64fmin_m1 : VSelectCommPredOrPassthruPatFrags; let Predicates = [HasSVE] in { - defm RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>; - def RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">; - defm RDFFR_P : sve_int_rdffr_unpred<"rdffr", int_aarch64_sve_rdffr>; - def SETFFR : sve_int_setffr<"setffr", int_aarch64_sve_setffr>; - def WRFFR : sve_int_wrffr<"wrffr", int_aarch64_sve_wrffr>; + def RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>; + def RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">; + def RDFFR_P : sve_int_rdffr_unpred<"rdffr", int_aarch64_sve_rdffr>; + def SETFFR : sve_int_setffr<"setffr", int_aarch64_sve_setffr>; + def WRFFR : sve_int_wrffr<"wrffr", int_aarch64_sve_wrffr>; } // End HasSVE let Predicates = [HasSVEorSME] in { diff --git a/llvm/lib/Target/AArch64/AArch64SchedA510.td b/llvm/lib/Target/AArch64/AArch64SchedA510.td index 5e36b6f4d34a2..68343674bc819 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedA510.td +++ b/llvm/lib/Target/AArch64/AArch64SchedA510.td @@ -1198,24 +1198,24 @@ def : InstRW<[CortexA510Write<3, CortexA510UnitLdSt>], (instregex "^LDNT1[BHWD]_ def : InstRW<[CortexA510Write<3, CortexA510UnitLdSt>], (instregex "^LDNT1[BHWD]_ZRR$")>; // Non temporal gather load, vector + scalar 32-bit element size -def : InstRW<[CortexA510MCWrite<9, 9, CortexA510UnitLdSt>], (instregex "^LDNT1[BHW]_ZZR_S_REAL$", - "^LDNT1S[BH]_ZZR_S_REAL$")>; +def : InstRW<[CortexA510MCWrite<9, 9, CortexA510UnitLdSt>], (instregex "^LDNT1[BHW]_ZZR_S$", + "^LDNT1S[BH]_ZZR_S$")>; // Non temporal gather load, vector + scalar 64-bit element size -def : InstRW<[CortexA510MCWrite<7, 7, CortexA510UnitLdSt>], (instregex "^LDNT1S?[BHW]_ZZR_D_REAL$")>; -def : InstRW<[CortexA510MCWrite<7, 7, CortexA510UnitLdSt>], (instrs LDNT1D_ZZR_D_REAL)>; +def : InstRW<[CortexA510MCWrite<7, 7, CortexA510UnitLdSt>], (instregex "^LDNT1S?[BHW]_ZZR_D$")>; +def : InstRW<[CortexA510MCWrite<7, 7, CortexA510UnitLdSt>], (instrs LDNT1D_ZZR_D)>; // Contiguous first faulting load, scalar + scalar -def : InstRW<[CortexA510Write<3, CortexA510UnitLd>], (instregex "^LDFF1[BHWD]_REAL$", - "^LDFF1S?B_[HSD]_REAL$", - "^LDFF1S?H_[SD]_REAL$", - "^LDFF1S?W_D_REAL$")>; +def : InstRW<[CortexA510Write<3, CortexA510UnitLd>], (instregex "^LDFF1[BHWD]$", + "^LDFF1S?B_[HSD]$", + "^LDFF1S?H_[SD]$", + "^LDFF1S?W_D$")>; // Contiguous non faulting load, scalar + imm -def : InstRW<[CortexA510Write<3, CortexA510UnitLd>], (instregex "^LDNF1[BHWD]_IMM_REAL$", - "^LDNF1S?B_[HSD]_IMM_REAL$", - "^LDNF1S?H_[SD]_IMM_REAL$", - "^LDNF1S?W_D_IMM_REAL$")>; +def : InstRW<[CortexA510Write<3, CortexA510UnitLd>], (instregex "^LDNF1[BHWD]_IMM$", + "^LDNF1S?B_[HSD]_IMM$", + "^LDNF1S?H_[SD]_IMM$", + "^LDNF1S?W_D_IMM$")>; // Contiguous Load two structures to two vectors, scalar + imm def : InstRW<[CortexA510MCWrite<3, 1, CortexA510UnitLdSt>], (instregex "^LD2[BHWD]_IMM$")>; @@ -1236,28 +1236,28 @@ def : InstRW<[CortexA510MCWrite<5, 3, CortexA510UnitLdSt>], (instregex "^LD4[BHW def : InstRW<[CortexA510MCWrite<5, 3, CortexA510UnitLdSt>], (instregex "^LD4[BHWD]$")>; // Gather load, vector + imm, 32-bit element size -def : InstRW<[CortexA510MCWrite<9, 9, CortexA510UnitLdSt>], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$", - "^GLD(FF)?1W_IMM_REAL$")>; +def : InstRW<[CortexA510MCWrite<9, 9, CortexA510UnitLdSt>], (instregex "^GLD(FF)?1S?[BH]_S_IMM$", + "^GLD(FF)?1W_IMM$")>; // Gather load, vector + imm, 64-bit element size -def : InstRW<[CortexA510MCWrite<7, 7, CortexA510UnitLdSt>], (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$", - "^GLD(FF)?1D_IMM_REAL$")>; +def : InstRW<[CortexA510MCWrite<7, 7, CortexA510UnitLdSt>], (instregex "^GLD(FF)?1S?[BHW]_D_IMM$", + "^GLD(FF)?1D_IMM$")>; // Gather load, 64-bit element size def : InstRW<[CortexA510MCWrite<7, 7, CortexA510UnitLdSt>], - (instregex "^GLD(FF)?1S?[BHW]_D_[SU]XTW_(SCALED_)?REAL$", - "^GLD(FF)?1S?[BHW]_D_(SCALED_)?REAL$", - "^GLD(FF)?1D_[SU]XTW_(SCALED_)?REAL$", - "^GLD(FF)?1D_(SCALED_)?REAL$")>; + (instregex "^GLD(FF)?1S?[BHW]_D_[SU]XTW(_SCALED)?$", + "^GLD(FF)?1S?[BHW]_D(_SCALED)?$", + "^GLD(FF)?1D_[SU]XTW(_SCALED)?$", + "^GLD(FF)?1D(_SCALED)?$")>; // Gather load, 32-bit scaled offset def : InstRW<[CortexA510MCWrite<9, 9, CortexA510UnitLd>], - (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED_REAL$", - "^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>; + (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED$", + "^GLD(FF)?1W_[SU]XTW_SCALED")>; // Gather load, 32-bit unpacked unscaled offset -def : InstRW<[CortexA510MCWrite<9, 9, CortexA510UnitLd>], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$", - "^GLD(FF)?1W_[SU]XTW_REAL$")>; +def : InstRW<[CortexA510MCWrite<9, 9, CortexA510UnitLd>], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW$", + "^GLD(FF)?1W_[SU]XTW$")>; def : InstRW<[CortexA510Write<0, CortexA510UnitVALU>], (instregex "^PRF(B|H|W|D).*")>; // SVE Store instructions @@ -1357,10 +1357,10 @@ def : InstRW<[CortexA510VSt<8>], (instregex "^SST1[BHW]_D$", // ----------------------------------------------------------------------------- // Read first fault register, unpredicated -def : InstRW<[CortexA510Write<1, CortexA510UnitALU>], (instrs RDFFR_P_REAL)>; +def : InstRW<[CortexA510Write<1, CortexA510UnitALU>], (instrs RDFFR_P)>; // Read first fault register, predicated -def : InstRW<[CortexA510Write<3, CortexA510UnitALU0>], (instrs RDFFR_PPz_REAL)>; +def : InstRW<[CortexA510Write<3, CortexA510UnitALU0>], (instrs RDFFR_PPz)>; // Read first fault register and set flags def : InstRW<[CortexA510Write<3, CortexA510UnitALU0>], (instrs RDFFRS_PPz)>; diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td index 8ec124954362f..c18045e7c8f96 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td +++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td @@ -2110,24 +2110,24 @@ def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNT1[BHWD]_ZRI$")>; def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDNT1[BHWD]_ZRR$")>; // Non temporal gather load, vector + scalar 32-bit element size -def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LDNT1[BHW]_ZZR_S_REAL$", - "^LDNT1S[BH]_ZZR_S_REAL$")>; +def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LDNT1[BHW]_ZZR_S$", + "^LDNT1S[BH]_ZZR_S$")>; // Non temporal gather load, vector + scalar 64-bit element size -def : InstRW<[N2Write_10cyc_2L_2V1], (instregex "^LDNT1S?[BHW]_ZZR_D_REAL$")>; -def : InstRW<[N2Write_10cyc_2L_2V1], (instrs LDNT1D_ZZR_D_REAL)>; +def : InstRW<[N2Write_10cyc_2L_2V1], (instregex "^LDNT1S?[BHW]_ZZR_D$")>; +def : InstRW<[N2Write_10cyc_2L_2V1], (instrs LDNT1D_ZZR_D)>; // Contiguous first faulting load, scalar + scalar -def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDFF1[BHWD]_REAL$", - "^LDFF1S?B_[HSD]_REAL$", - "^LDFF1S?H_[SD]_REAL$", - "^LDFF1S?W_D_REAL$")>; +def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDFF1[BHWD]$", + "^LDFF1S?B_[HSD]$", + "^LDFF1S?H_[SD]$", + "^LDFF1S?W_D$")>; // Contiguous non faulting load, scalar + imm -def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNF1[BHWD]_IMM_REAL$", - "^LDNF1S?B_[HSD]_IMM_REAL$", - "^LDNF1S?H_[SD]_IMM_REAL$", - "^LDNF1S?W_D_IMM_REAL$")>; +def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNF1[BHWD]_IMM$", + "^LDNF1S?B_[HSD]_IMM$", + "^LDNF1S?H_[SD]_IMM$", + "^LDNF1S?W_D_IMM$")>; // Contiguous Load two structures to two vectors, scalar + imm def : InstRW<[N2Write_8cyc_1L_1V], (instregex "^LD2[BHWD]_IMM$")>; @@ -2148,28 +2148,28 @@ def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^LD4[BHWD]_IMM$")>; def : InstRW<[N2Write_10cyc_2L_2V_2S], (instregex "^LD4[BHWD]$")>; // Gather load, vector + imm, 32-bit element size -def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$", - "^GLD(FF)?1W_IMM_REAL$")>; +def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM$", + "^GLD(FF)?1W_IMM$")>; // Gather load, vector + imm, 64-bit element size -def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$", - "^GLD(FF)?1D_IMM_REAL$")>; +def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM$", + "^GLD(FF)?1D_IMM$")>; // Gather load, 64-bit element size def : InstRW<[N2Write_9cyc_2L_2V], - (instregex "^GLD(FF)?1S?[BHW]_D_[SU]XTW_(SCALED_)?REAL$", - "^GLD(FF)?1S?[BHW]_D_(SCALED_)?REAL$", - "^GLD(FF)?1D_[SU]XTW_(SCALED_)?REAL$", - "^GLD(FF)?1D_(SCALED_)?REAL$")>; + (instregex "^GLD(FF)?1S?[BHW]_D_[SU]XTW(_SCALED)?$", + "^GLD(FF)?1S?[BHW]_D(_SCALED)?$", + "^GLD(FF)?1D_[SU]XTW(_SCALED)?$", + "^GLD(FF)?1D(_SCALED)?$")>; // Gather load, 32-bit scaled offset def : InstRW<[N2Write_10cyc_2L_2V], - (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED_REAL$", - "^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>; + (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED$", + "^GLD(FF)?1W_[SU]XTW_SCALED")>; // Gather load, 32-bit unpacked unscaled offset -def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$", - "^GLD(FF)?1W_[SU]XTW_REAL$")>; +def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW$", + "^GLD(FF)?1W_[SU]XTW$")>; // SVE Store instructions // ----------------------------------------------------------------------------- @@ -2268,10 +2268,10 @@ def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D$", // ----------------------------------------------------------------------------- // Read first fault register, unpredicated -def : InstRW<[N2Write_2cyc_1M0], (instrs RDFFR_P_REAL)>; +def : InstRW<[N2Write_2cyc_1M0], (instrs RDFFR_P)>; // Read first fault register, predicated -def : InstRW<[N2Write_3cyc_1M0_1M], (instrs RDFFR_PPz_REAL)>; +def : InstRW<[N2Write_3cyc_1M0_1M], (instrs RDFFR_PPz)>; // Read first fault register and set flags def : InstRW<[N2Write_4cyc_2M0_2M], (instrs RDFFRS_PPz)>; diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td index 613db353cb0aa..e50a401f8b2ae 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td +++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td @@ -1714,17 +1714,17 @@ def : InstRW<[V1Write_7c_1L01_1S], (instrs LDNT1H_ZRR)>; def : InstRW<[V1Write_6c_1L01_1S], (instregex "^LDNT1[BWD]_ZRR$")>; // Contiguous first faulting load, scalar + scalar -def : InstRW<[V1Write_7c_1L01_1S], (instregex "^LDFF1H_REAL$", - "^LDFF1S?H_[SD]_REAL$")>; -def : InstRW<[V1Write_6c_1L01_1S], (instregex "^LDFF1[BWD]_REAL$", - "^LDFF1S?B_[HSD]_REAL$", - "^LDFF1S?W_D_REAL$")>; +def : InstRW<[V1Write_7c_1L01_1S], (instregex "^LDFF1H$", + "^LDFF1S?H_[SD]$")>; +def : InstRW<[V1Write_6c_1L01_1S], (instregex "^LDFF1[BWD]$", + "^LDFF1S?B_[HSD]$", + "^LDFF1S?W_D$")>; // Contiguous non faulting load, scalar + imm -def : InstRW<[V1Write_6c_1L01], (instregex "^LDNF1[BHWD]_IMM_REAL$", - "^LDNF1S?B_[HSD]_IMM_REAL$", - "^LDNF1S?H_[SD]_IMM_REAL$", - "^LDNF1S?W_D_IMM_REAL$")>; +def : InstRW<[V1Write_6c_1L01], (instregex "^LDNF1[BHWD]_IMM$", + "^LDNF1S?B_[HSD]_IMM$", + "^LDNF1S?H_[SD]_IMM$", + "^LDNF1S?W_D_IMM$")>; // Contiguous Load two structures to two vectors, scalar + imm def : InstRW<[V1Write_8c_2L01_2V01], (instregex "^LD2[BHWD]_IMM$")>; @@ -1746,25 +1746,25 @@ def : InstRW<[V1Write_12c_4L01_4V01], (instregex "^LD4[BHWD]_IMM$")>; def : InstRW<[V1Write_13c_4L01_2S_4V01], (instregex "^LD4[BHWD]$")>; // Gather load, vector + imm, 32-bit element size -def : InstRW<[V1Write_11c_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$", - "^GLD(FF)?1W_IMM_REAL$")>; +def : InstRW<[V1Write_11c_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM$", + "^GLD(FF)?1W_IMM$")>; // Gather load, vector + imm, 64-bit element size def : InstRW<[V1Write_9c_2L_2V], - (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$", - "^GLD(FF)?1S?[BHW]_D_([SU]XTW_)?(SCALED_)?REAL$", - "^GLD(FF)?1D_IMM_REAL$", - "^GLD(FF)?1D_([SU]XTW_)?(SCALED_)?REAL$")>; + (instregex "^GLD(FF)?1S?[BHW]_D_IMM$", + "^GLD(FF)?1S?[BHW]_D(_[SU]XTW)?(_SCALED)?$", + "^GLD(FF)?1D_IMM$", + "^GLD(FF)?1D(_[SU]XTW)?(_SCALED)?$")>; // Gather load, 32-bit scaled offset def : InstRW<[V1Write_11c_2L_2V], - (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED_REAL$", - "^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>; + (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED$", + "^GLD(FF)?1W_[SU]XTW_SCALED")>; // Gather load, 32-bit unpacked unscaled offset def : InstRW<[V1Write_9c_1L_1V], - (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$", - "^GLD(FF)?1W_[SU]XTW_REAL$")>; + (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW$", + "^GLD(FF)?1W_[SU]XTW$")>; // Prefetch // NOTE: This is not specified in the SOG. @@ -1848,12 +1848,12 @@ def : InstRW<[V1Write_6c_1L01_1V], (instregex "^SST1[BHW]_D_IMM$", // Read first fault register, unpredicated // Set first fault register // Write to first fault register -def : InstRW<[V1Write_2c_1M0], (instrs RDFFR_P_REAL, +def : InstRW<[V1Write_2c_1M0], (instrs RDFFR_P, SETFFR, WRFFR)>; // Read first fault register, predicated -def : InstRW<[V1Write_3c_2M0], (instrs RDFFR_PPz_REAL)>; +def : InstRW<[V1Write_3c_2M0], (instrs RDFFR_PPz)>; // Read first fault register and set flags def : InstRW<[V1Write_4c_1M], (instrs RDFFRS_PPz)>; diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td index e7de40fdf1deb..807ce40bc5eac 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td +++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td @@ -2635,24 +2635,24 @@ def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1RQ_[BHWD]$")>; def : InstRW<[V2Write_6cyc_1L], (instregex "^LDNT1[BHWD]_ZR[IR]$")>; // Non temporal gather load, vector + scalar 32-bit element size -def : InstRW<[V2Write_9cyc_2L_4V], (instregex "^LDNT1[BHW]_ZZR_S_REAL$", - "^LDNT1S[BH]_ZZR_S_REAL$")>; +def : InstRW<[V2Write_9cyc_2L_4V], (instregex "^LDNT1[BHW]_ZZR_S$", + "^LDNT1S[BH]_ZZR_S$")>; // Non temporal gather load, vector + scalar 64-bit element size -def : InstRW<[V2Write_9cyc_2L_2V1], (instregex "^LDNT1S?[BHW]_ZZR_D_REAL$")>; -def : InstRW<[V2Write_9cyc_2L_2V1], (instrs LDNT1D_ZZR_D_REAL)>; +def : InstRW<[V2Write_9cyc_2L_2V1], (instregex "^LDNT1S?[BHW]_ZZR_D$")>; +def : InstRW<[V2Write_9cyc_2L_2V1], (instrs LDNT1D_ZZR_D)>; // Contiguous first faulting load, scalar + scalar -def : InstRW<[V2Write_6cyc_1L_1S], (instregex "^LDFF1[BHWD]_REAL$", - "^LDFF1S?B_[HSD]_REAL$", - "^LDFF1S?H_[SD]_REAL$", - "^LDFF1S?W_D_REAL$")>; +def : InstRW<[V2Write_6cyc_1L_1S], (instregex "^LDFF1[BHWD]$", + "^LDFF1S?B_[HSD]$", + "^LDFF1S?H_[SD]$", + "^LDFF1S?W_D$")>; // Contiguous non faulting load, scalar + imm -def : InstRW<[V2Write_6cyc_1L], (instregex "^LDNF1[BHWD]_IMM_REAL$", - "^LDNF1S?B_[HSD]_IMM_REAL$", - "^LDNF1S?H_[SD]_IMM_REAL$", - "^LDNF1S?W_D_IMM_REAL$")>; +def : InstRW<[V2Write_6cyc_1L], (instregex "^LDNF1[BHWD]_IMM$", + "^LDNF1S?B_[HSD]_IMM$", + "^LDNF1S?H_[SD]_IMM$", + "^LDNF1S?W_D_IMM$")>; // Contiguous Load two structures to two vectors, scalar + imm def : InstRW<[V2Write_8cyc_2L_2V], (instregex "^LD2[BHWD]_IMM$")>; @@ -2673,33 +2673,33 @@ def : InstRW<[V2Write_9cyc_4L_8V], (instregex "^LD4[BHWD]_IMM$")>; def : InstRW<[V2Write_10cyc_4L_8V_4S], (instregex "^LD4[BHWD]$")>; // Gather load, vector + imm, 32-bit element size -def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$", - "^GLD(FF)?1W_IMM_REAL$")>; +def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BH]_S_IMM$", + "^GLD(FF)?1W_IMM$")>; // Gather load, vector + imm, 64-bit element size -def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$", - "^GLD(FF)?1D_IMM_REAL$")>; +def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM$", + "^GLD(FF)?1D_IMM$")>; // Gather load, 32-bit scaled offset def : InstRW<[V2Write_10cyc_1L_8V], - (instregex "^GLD(FF)?1S?H_S_[SU]XTW_SCALED_REAL$", - "^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>; + (instregex "^GLD(FF)?1S?H_S_[SU]XTW_SCALED$", + "^GLD(FF)?1W_[SU]XTW_SCALED")>; // Gather load, 64-bit scaled offset // NOTE: These instructions are not specified in the SOG. def : InstRW<[V2Write_10cyc_1L_4V], - (instregex "^GLD(FF)?1S?[HW]_D_([SU]XTW_)?SCALED_REAL$", - "^GLD(FF)?1D_([SU]XTW_)?SCALED_REAL$")>; + (instregex "^GLD(FF)?1S?[HW]_D_([SU]XTW_)?SCALED$", + "^GLD(FF)?1D_([SU]XTW_)?SCALED$")>; // Gather load, 32-bit unpacked unscaled offset -def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$", - "^GLD(FF)?1W_[SU]XTW_REAL$")>; +def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW$", + "^GLD(FF)?1W_[SU]XTW$")>; // Gather load, 64-bit unpacked unscaled offset // NOTE: These instructions are not specified in the SOG. def : InstRW<[V2Write_9cyc_1L_2V], - (instregex "^GLD(FF)?1S?[BHW]_D_([SU]XTW_)?REAL$", - "^GLD(FF)?1D_([SU]XTW_)?REAL$")>; + (instregex "^GLD(FF)?1S?[BHW]_D(_[SU]XTW)?$", + "^GLD(FF)?1D(_[SU]XTW)?$")>; // SVE Store instructions // ----------------------------------------------------------------------------- @@ -2790,10 +2790,10 @@ def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[BHW]_D$", // ----------------------------------------------------------------------------- // Read first fault register, unpredicated -def : InstRW<[V2Write_2cyc_1M0], (instrs RDFFR_P_REAL)>; +def : InstRW<[V2Write_2cyc_1M0], (instrs RDFFR_P)>; // Read first fault register, predicated -def : InstRW<[V2Write_3or4cyc_1M0_1M], (instrs RDFFR_PPz_REAL)>; +def : InstRW<[V2Write_3or4cyc_1M0_1M], (instrs RDFFR_PPz)>; // Read first fault register and set flags def : InstRW<[V2Write_4or5cyc_2M0_2M], (instrs RDFFRS_PPz)>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 789ec817d3d8b..58ba1ba0c190d 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -6412,33 +6412,33 @@ class sve2_mem_sstnt_vs_base opc, string asm, multiclass sve2_mem_sstnt_vs_32_ptrs opc, string asm, SDPatternOperator op, ValueType vt> { - def _REAL : sve2_mem_sstnt_vs_base; + def NAME : sve2_mem_sstnt_vs_base; def : InstAlias(NAME # _REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, GPR64:$Rm), 0>; + (!cast(NAME) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, GPR64:$Rm), 0>; def : InstAlias(NAME # _REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, XZR), 0>; + (!cast(NAME) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, XZR), 0>; def : InstAlias(NAME # _REAL) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, XZR), 1>; + (!cast(NAME) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, XZR), 1>; def : Pat <(op (nxv4i32 ZPR32:$Zt), (nxv4i1 PPR3bAny:$Pg), (nxv4i32 ZPR32:$Zn), (i64 GPR64:$Rm), vt), - (!cast(NAME # _REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, GPR64:$Rm)>; + (!cast(NAME) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, GPR64:$Rm)>; } multiclass sve2_mem_sstnt_vs_64_ptrs opc, string asm, SDPatternOperator op, ValueType vt> { - def _REAL : sve2_mem_sstnt_vs_base; + def NAME : sve2_mem_sstnt_vs_base; def : InstAlias(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm), 0>; + (!cast(NAME) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm), 0>; def : InstAlias(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 0>; + (!cast(NAME) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 0>; def : InstAlias(NAME # _REAL) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 1>; + (!cast(NAME) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 1>; def : Pat <(op (nxv2i64 ZPR64:$Zt), (nxv2i1 PPR3bAny:$Pg), (nxv2i64 ZPR64:$Zn), (i64 GPR64:$Rm), vt), - (!cast(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm)>; + (!cast(NAME) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm)>; } class sve_mem_sst_sv opc, bit xs, bit scaled, string asm, @@ -6773,11 +6773,11 @@ multiclass sve_int_perm_punpk { def : SVE_1_Op_Pat(NAME)>; } -class sve_int_rdffr_pred +class sve_int_rdffr_pred : I<(outs PPR8:$Pd), (ins PPRAny:$Pg), asm, "\t$Pd, $Pg/z", "", - []>, Sched<[]> { + [(set (nxv16i1 PPR8:$Pd), (op (nxv16i1 PPRAny:$Pg)))]>, Sched<[]> { bits<4> Pd; bits<4> Pg; let Inst{31-23} = 0b001001010; @@ -6792,22 +6792,11 @@ class sve_int_rdffr_pred let hasSideEffects = 1; } -multiclass sve_int_rdffr_pred { - def _REAL : sve_int_rdffr_pred; - - // We need a layer of indirection because early machine code passes balk at - // physical register (i.e. FFR) uses that have no previous definition. - let hasSideEffects = 1, hasNoSchedulingInfo = 1 in { - def "" : Pseudo<(outs PPR8:$Pd), (ins PPRAny:$Pg), [(set (nxv16i1 PPR8:$Pd), (op (nxv16i1 PPRAny:$Pg)))]>, - PseudoInstExpansion<(!cast(NAME # _REAL) PPR8:$Pd, PPRAny:$Pg)>; - } -} - -class sve_int_rdffr_unpred : I< +class sve_int_rdffr_unpred : I< (outs PPR8:$Pd), (ins), asm, "\t$Pd", "", - []>, Sched<[]> { + [(set (nxv16i1 PPR8:$Pd), (op))]>, Sched<[]> { bits<4> Pd; let Inst{31-4} = 0b0010010100011001111100000000; let Inst{3-0} = Pd; @@ -6816,17 +6805,6 @@ class sve_int_rdffr_unpred : I< let hasSideEffects = 1; } -multiclass sve_int_rdffr_unpred { - def _REAL : sve_int_rdffr_unpred; - - // We need a layer of indirection because early machine code passes balk at - // physical register (i.e. FFR) uses that have no previous definition. - let hasSideEffects = 1, hasNoSchedulingInfo = 1 in { - def "" : Pseudo<(outs PPR8:$Pd), (ins), [(set (nxv16i1 PPR8:$Pd), (op))]>, - PseudoInstExpansion<(!cast(NAME # _REAL) PPR8:$Pd)>; - } -} - class sve_int_wrffr : I<(outs), (ins PPR8:$Pn), asm, "\t$Pn", @@ -7318,9 +7296,9 @@ class sve_mem_cld_si_base dtype, bit nf, string asm, let mayLoad = 1; } -multiclass sve_mem_cld_si dtype, string asm, RegisterOperand listty, - ZPRRegOp zprty> { - def "" : sve_mem_cld_si_base; +multiclass sve_mem_cld_si_base dtype, bit nf, string asm, + RegisterOperand listty, ZPRRegOp zprty> { + def NAME : sve_mem_cld_si_base; def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>; @@ -7330,6 +7308,14 @@ multiclass sve_mem_cld_si dtype, string asm, RegisterOperand listty, (!cast(NAME) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>; } +multiclass sve_mem_cld_si dtype, string asm, RegisterOperand listty, + ZPRRegOp zprty> +: sve_mem_cld_si_base; + +multiclass sve_mem_cldnf_si dtype, string asm, RegisterOperand listty, + ZPRRegOp zprty> +: sve_mem_cld_si_base; + class sve_mem_cldnt_si_base msz, string asm, RegisterOperand VecList> : I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), asm, "\t$Zt, $Pg/z, [$Rn, $imm4, mul vl]", @@ -7518,7 +7504,7 @@ class sve_mem_cld_ss_base dtype, bit ff, dag iops, string asm, multiclass sve_mem_cld_ss dtype, string asm, RegisterOperand listty, ZPRRegOp zprty, RegisterOperand gprty> { - def "" : sve_mem_cld_ss_base; def : InstAlias dtype, string asm, RegisterOperand listty, multiclass sve_mem_cldff_ss dtype, string asm, RegisterOperand listty, ZPRRegOp zprty, RegisterOperand gprty> { - def _REAL : sve_mem_cld_ss_base; + def NAME : sve_mem_cld_ss_base; def : InstAlias(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>; - - def : InstAlias(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 1>; + (!cast(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>; def : InstAlias(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 0>; - - // We need a layer of indirection because early machine code passes balk at - // physical register (i.e. FFR) uses that have no previous definition. - let hasSideEffects = 1, hasNoSchedulingInfo = 1 in { - def "" : Pseudo<(outs listty:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), []>, - PseudoInstExpansion<(!cast(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm)>; - } -} - -multiclass sve_mem_cldnf_si dtype, string asm, RegisterOperand listty, - ZPRRegOp zprty> { - def _REAL : sve_mem_cld_si_base; + (!cast(NAME) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 1>; def : InstAlias(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>; - def : InstAlias(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), 0>; - def : InstAlias(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>; - - // We need a layer of indirection because early machine code passes balk at - // physical register (i.e. FFR) uses that have no previous definition. - let hasSideEffects = 1, hasNoSchedulingInfo = 1, mayLoad = 1 in { - def "" : Pseudo<(outs listty:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), []>, - PseudoInstExpansion<(!cast(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4)>; - } + (!cast(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 0>; } class sve_mem_eld_si sz, bits<3> nregs, RegisterOperand VecList, @@ -7664,22 +7623,13 @@ multiclass sve_mem_32b_gld_sv_32_scaled opc, string asm, RegisterOperand sxtw_opnd, RegisterOperand uxtw_opnd, ValueType vt> { - def _UXTW_SCALED_REAL : sve_mem_32b_gld_sv; - def _SXTW_SCALED_REAL : sve_mem_32b_gld_sv; + def _UXTW_SCALED : sve_mem_32b_gld_sv; + def _SXTW_SCALED : sve_mem_32b_gld_sv; def : InstAlias(NAME # _UXTW_SCALED_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>; + (!cast(NAME # _UXTW_SCALED) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>; def : InstAlias(NAME # _SXTW_SCALED_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>; - - // We need a layer of indirection because early machine code passes balk at - // physical register (i.e. FFR) uses that have no previous definition. - let hasSideEffects = 1, hasNoSchedulingInfo = 1 in { - def _UXTW_SCALED : Pseudo<(outs Z_s:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), []>, - PseudoInstExpansion<(!cast(NAME # _UXTW_SCALED_REAL) Z_s:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm)>; - def _SXTW_SCALED : Pseudo<(outs Z_s:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), []>, - PseudoInstExpansion<(!cast(NAME # _SXTW_SCALED_REAL) Z_s:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm)>; - } + (!cast(NAME # _SXTW_SCALED) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>; def : Pat<(nxv4i32 (uxtw_op (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$indices), vt)), (!cast(NAME # _UXTW_SCALED) PPR:$gp, GPR64sp:$base, ZPR:$indices)>; @@ -7693,22 +7643,13 @@ multiclass sve_mem_32b_gld_vs_32_unscaled opc, string asm, RegisterOperand sxtw_opnd, RegisterOperand uxtw_opnd, ValueType vt> { - def _UXTW_REAL : sve_mem_32b_gld_sv; - def _SXTW_REAL : sve_mem_32b_gld_sv; + def _UXTW : sve_mem_32b_gld_sv; + def _SXTW : sve_mem_32b_gld_sv; def : InstAlias(NAME # _UXTW_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>; + (!cast(NAME # _UXTW) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>; def : InstAlias(NAME # _SXTW_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>; - - // We need a layer of indirection because early machine code passes balk at - // physical register (i.e. FFR) uses that have no previous definition. - let hasSideEffects = 1, hasNoSchedulingInfo = 1 in { - def _UXTW : Pseudo<(outs Z_s:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), []>, - PseudoInstExpansion<(!cast(NAME # _UXTW_REAL) Z_s:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm)>; - def _SXTW : Pseudo<(outs Z_s:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), []>, - PseudoInstExpansion<(!cast(NAME # _SXTW_REAL) Z_s:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm)>; - } + (!cast(NAME # _SXTW) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>; def : Pat<(nxv4i32 (uxtw_op (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$offsets), vt)), (!cast(NAME # _UXTW) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>; @@ -7745,21 +7686,14 @@ class sve_mem_32b_gld_vi opc, string asm, Operand imm_ty> multiclass sve_mem_32b_gld_vi_32_ptrs opc, string asm, Operand imm_ty, SDPatternOperator op, ValueType vt> { - def _IMM_REAL : sve_mem_32b_gld_vi; + def _IMM : sve_mem_32b_gld_vi; def : InstAlias(NAME # _IMM_REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, 0), 0>; + (!cast(NAME # _IMM) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, 0), 0>; def : InstAlias(NAME # _IMM_REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, imm_ty:$imm5), 0>; + (!cast(NAME # _IMM) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, imm_ty:$imm5), 0>; def : InstAlias(NAME # _IMM_REAL) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, 0), 1>; - - // We need a layer of indirection because early machine code passes balk at - // physical register (i.e. FFR) uses that have no previous definition. - let hasSideEffects = 1, hasNoSchedulingInfo = 1 in { - def _IMM : Pseudo<(outs Z_s:$Zt), (ins PPR3bAny:$Pg, ZPR32:$Zn, imm_ty:$imm5), []>, - PseudoInstExpansion<(!cast(NAME # _IMM_REAL) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, imm_ty:$imm5)>; - } + (!cast(NAME # _IMM) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, 0), 1>; def : Pat<(nxv4i32 (op (nxv4i1 PPR:$gp), (nxv4i32 ZPR:$ptrs), imm_ty:$index, vt)), (!cast(NAME # _IMM) PPR:$gp, ZPR:$ptrs, imm_ty:$index)>; @@ -7970,35 +7904,33 @@ class sve2_mem_gldnt_vs_base opc, dag iops, string asm, multiclass sve2_mem_gldnt_vs_32_ptrs opc, string asm, SDPatternOperator op, ValueType vt> { - def _REAL : sve2_mem_gldnt_vs_base; + def NAME : sve2_mem_gldnt_vs_base; def : InstAlias(NAME # _REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, GPR64:$Rm), 0>; + (!cast(NAME) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, GPR64:$Rm), 0>; def : InstAlias(NAME # _REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, XZR), 0>; + (!cast(NAME) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, XZR), 0>; def : InstAlias(NAME # _REAL) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, XZR), 1>; + (!cast(NAME) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, XZR), 1>; def : Pat <(nxv4i32 (op (nxv4i1 PPR3bAny:$Pg), (nxv4i32 ZPR32:$Zd), (i64 GPR64:$Rm), vt)), - (!cast(NAME # _REAL) PPR3bAny:$Pg, ZPR32:$Zd, GPR64:$Rm)>; + (!cast(NAME) PPR3bAny:$Pg, ZPR32:$Zd, GPR64:$Rm)>; } multiclass sve2_mem_gldnt_vs_64_ptrs opc, string asm, SDPatternOperator op, ValueType vt> { - def _REAL : sve2_mem_gldnt_vs_base; + def NAME : sve2_mem_gldnt_vs_base; def : InstAlias(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm), 0>; + (!cast(NAME) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm), 0>; def : InstAlias(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 0>; + (!cast(NAME) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 0>; def : InstAlias(NAME # _REAL) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 1>; + (!cast(NAME) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 1>; def : Pat <(nxv2i64 (op (nxv2i1 PPR3bAny:$Pg), (nxv2i64 ZPR64:$Zd), (i64 GPR64:$Rm), vt)), - (!cast(NAME # _REAL) PPR3bAny:$Pg, ZPR64:$Zd, GPR64:$Rm)>; + (!cast(NAME) PPR3bAny:$Pg, ZPR64:$Zd, GPR64:$Rm)>; } //===----------------------------------------------------------------------===// @@ -8042,22 +7974,13 @@ multiclass sve_mem_64b_gld_sv_32_scaled opc, string asm, RegisterOperand sxtw_opnd, RegisterOperand uxtw_opnd, ValueType vt> { - def _UXTW_SCALED_REAL : sve_mem_64b_gld_sv; - def _SXTW_SCALED_REAL : sve_mem_64b_gld_sv; + def _UXTW_SCALED : sve_mem_64b_gld_sv; + def _SXTW_SCALED : sve_mem_64b_gld_sv; def : InstAlias(NAME # _UXTW_SCALED_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>; + (!cast(NAME # _UXTW_SCALED) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>; def : InstAlias(NAME # _SXTW_SCALED_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>; - - // We need a layer of indirection because early machine code passes balk at - // physical register (i.e. FFR) uses that have no previous definition. - let hasSideEffects = 1, hasNoSchedulingInfo = 1 in { - def _UXTW_SCALED : Pseudo<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), []>, - PseudoInstExpansion<(!cast(NAME # _UXTW_SCALED_REAL) Z_d:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm)>; - def _SXTW_SCALED : Pseudo<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), []>, - PseudoInstExpansion<(!cast(NAME # _SXTW_SCALED_REAL) Z_d:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm)>; - } + (!cast(NAME # _SXTW_SCALED) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>; def : Pat<(nxv2i64 (uxtw_op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$indices), vt)), (!cast(NAME # _UXTW_SCALED) PPR:$gp, GPR64sp:$base, ZPR:$indices)>; @@ -8071,22 +7994,13 @@ multiclass sve_mem_64b_gld_vs_32_unscaled opc, string asm, RegisterOperand sxtw_opnd, RegisterOperand uxtw_opnd, ValueType vt> { - def _UXTW_REAL : sve_mem_64b_gld_sv; - def _SXTW_REAL : sve_mem_64b_gld_sv; + def _UXTW : sve_mem_64b_gld_sv; + def _SXTW : sve_mem_64b_gld_sv; def : InstAlias(NAME # _UXTW_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>; + (!cast(NAME # _UXTW) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>; def : InstAlias(NAME # _SXTW_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>; - - // We need a layer of indirection because early machine code passes balk at - // physical register (i.e. FFR) uses that have no previous definition. - let hasSideEffects = 1, hasNoSchedulingInfo = 1 in { - def _UXTW : Pseudo<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), []>, - PseudoInstExpansion<(!cast(NAME # _UXTW_REAL) Z_d:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm)>; - def _SXTW : Pseudo<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), []>, - PseudoInstExpansion<(!cast(NAME # _SXTW_REAL) Z_d:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm)>; - } + (!cast(NAME # _SXTW) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>; def : Pat<(nxv2i64 (uxtw_op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt)), (!cast(NAME # _UXTW) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>; @@ -8097,17 +8011,10 @@ multiclass sve_mem_64b_gld_vs_32_unscaled opc, string asm, multiclass sve_mem_64b_gld_sv2_64_scaled opc, string asm, SDPatternOperator op, RegisterOperand zprext, ValueType vt> { - def _SCALED_REAL : sve_mem_64b_gld_sv; + def _SCALED : sve_mem_64b_gld_sv; def : InstAlias(NAME # _SCALED_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), 0>; - - // We need a layer of indirection because early machine code passes balk at - // physical register (i.e. FFR) uses that have no previous definition. - let hasSideEffects = 1, hasNoSchedulingInfo = 1 in { - def _SCALED : Pseudo<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), []>, - PseudoInstExpansion<(!cast(NAME # _SCALED_REAL) Z_d:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm)>; - } + (!cast(NAME # _SCALED) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), 0>; def : Pat<(nxv2i64 (op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$indices), vt)), (!cast(NAME # _SCALED) PPR:$gp, GPR64sp:$base, ZPR:$indices)>; @@ -8115,17 +8022,10 @@ multiclass sve_mem_64b_gld_sv2_64_scaled opc, string asm, multiclass sve_mem_64b_gld_vs2_64_unscaled opc, string asm, SDPatternOperator op, ValueType vt> { - def _REAL : sve_mem_64b_gld_sv; + def NAME : sve_mem_64b_gld_sv; def : InstAlias(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm), 0>; - - // We need a layer of indirection because early machine code passes balk at - // physical register (i.e. FFR) uses that have no previous definition. - let hasSideEffects = 1, hasNoSchedulingInfo = 1 in { - def "" : Pseudo<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm), []>, - PseudoInstExpansion<(!cast(NAME # _REAL) Z_d:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm)>; - } + (!cast(NAME) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm), 0>; def : Pat<(nxv2i64 (op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt)), (!cast(NAME) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>; @@ -8158,21 +8058,14 @@ class sve_mem_64b_gld_vi opc, string asm, Operand imm_ty> multiclass sve_mem_64b_gld_vi_64_ptrs opc, string asm, Operand imm_ty, SDPatternOperator op, ValueType vt> { - def _IMM_REAL : sve_mem_64b_gld_vi; + def _IMM : sve_mem_64b_gld_vi; def : InstAlias(NAME # _IMM_REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, 0), 0>; + (!cast(NAME # _IMM) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, 0), 0>; def : InstAlias(NAME # _IMM_REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, imm_ty:$imm5), 0>; + (!cast(NAME # _IMM) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, imm_ty:$imm5), 0>; def : InstAlias(NAME # _IMM_REAL) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, 0), 1>; - - // We need a layer of indirection because early machine code passes balk at - // physical register (i.e. FFR) uses that have no previous definition. - let hasSideEffects = 1, hasNoSchedulingInfo = 1 in { - def _IMM : Pseudo<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, ZPR64:$Zn, imm_ty:$imm5), []>, - PseudoInstExpansion<(!cast(NAME # _IMM_REAL) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, imm_ty:$imm5)>; - } + (!cast(NAME # _IMM) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, 0), 1>; def : Pat<(nxv2i64 (op (nxv2i1 PPR:$gp), (nxv2i64 ZPR:$ptrs), imm_ty:$index, vt)), (!cast(NAME # _IMM) PPR:$gp, ZPR:$ptrs, imm_ty:$index)>; diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-mask-opt.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-mask-opt.ll index 29ad550c40d91..e23151475014d 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-mask-opt.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-mask-opt.ll @@ -49,8 +49,8 @@ define void @masked_gather_v8i8(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1b { z0.d }, p0/z, [z0.d] ; VBITS_GE_256-NEXT: ld1b { z1.d }, p0/z, [z1.d] ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s -; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s ; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h +; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s ; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h ; VBITS_GE_256-NEXT: uzp1 v0.8b, v1.8b, v0.8b ; VBITS_GE_256-NEXT: str d0, [x0] @@ -153,8 +153,8 @@ define void @masked_gather_v8i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1h { z0.d }, p0/z, [z0.d] ; VBITS_GE_256-NEXT: ld1h { z1.d }, p0/z, [z1.d] ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s -; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s ; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h +; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s ; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h ; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0] ; VBITS_GE_256-NEXT: str q1, [x0] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll index 4ac0abcb851d4..92fce4584f6a9 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll @@ -66,7 +66,6 @@ define void @masked_gather_v8i8(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: zip2 v1.8b, v0.8b, v0.8b ; VBITS_GE_256-NEXT: zip1 v0.8b, v0.8b, v0.8b ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] -; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: shl v1.4h, v1.4h, #8 ; VBITS_GE_256-NEXT: shl v0.4h, v0.4h, #8 ; VBITS_GE_256-NEXT: sshr v1.4h, v1.4h, #8 @@ -76,14 +75,15 @@ define void @masked_gather_v8i8(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s ; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s ; VBITS_GE_256-NEXT: cmpne p1.d, p0/z, z1.d, #0 +; VBITS_GE_256-NEXT: ld1b { z1.d }, p1/z, [z2.d] +; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: cmpne p0.d, p0/z, z0.d, #0 -; VBITS_GE_256-NEXT: ld1b { z0.d }, p1/z, [z2.d] -; VBITS_GE_256-NEXT: ld1b { z1.d }, p0/z, [z3.d] -; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s +; VBITS_GE_256-NEXT: ld1b { z0.d }, p0/z, [z2.d] ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s -; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h -; VBITS_GE_256-NEXT: uzp1 v0.8b, v1.8b, v0.8b +; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s +; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h +; VBITS_GE_256-NEXT: uzp1 v0.8b, v0.8b, v1.8b ; VBITS_GE_256-NEXT: str d0, [x0] ; VBITS_GE_256-NEXT: ret ; @@ -217,23 +217,23 @@ define void @masked_gather_v8i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: cmeq v0.8h, v0.8h, #0 -; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; VBITS_GE_256-NEXT: sunpklo z1.s, z0.h +; VBITS_GE_256-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h -; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] -; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] -; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h -; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s ; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s -; VBITS_GE_256-NEXT: cmpne p1.d, p0/z, z0.d, #0 -; VBITS_GE_256-NEXT: ld1h { z0.d }, p1/z, [z3.d] -; VBITS_GE_256-NEXT: cmpne p0.d, p0/z, z1.d, #0 -; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s -; VBITS_GE_256-NEXT: ld1h { z1.d }, p0/z, [z2.d] +; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s +; VBITS_GE_256-NEXT: cmpne p1.d, p0/z, z1.d, #0 +; VBITS_GE_256-NEXT: ld1h { z1.d }, p1/z, [z2.d] +; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] +; VBITS_GE_256-NEXT: cmpne p0.d, p0/z, z0.d, #0 +; VBITS_GE_256-NEXT: ld1h { z0.d }, p0/z, [z2.d] ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s -; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h -; VBITS_GE_256-NEXT: mov v0.d[1], v1.d[0] -; VBITS_GE_256-NEXT: str q0, [x0] +; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s +; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h +; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0] +; VBITS_GE_256-NEXT: str q1, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: masked_gather_v8i16: @@ -353,22 +353,22 @@ define void @masked_gather_v8i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ptrue p2.d, vl4 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_GE_256-NEXT: ld1d { z1.d }, p2/z, [x1, x8, lsl #3] -; VBITS_GE_256-NEXT: ld1d { z2.d }, p2/z, [x1] +; VBITS_GE_256-NEXT: ld1d { z1.d }, p2/z, [x1] +; VBITS_GE_256-NEXT: ld1d { z2.d }, p2/z, [x1, x8, lsl #3] ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z0.s, #0 +; VBITS_GE_256-NEXT: punpklo p3.h, p1.b ; VBITS_GE_256-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff -; VBITS_GE_256-NEXT: punpklo p1.h, p1.b ; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16 +; VBITS_GE_256-NEXT: and p1.b, p3/z, p3.b, p2.b ; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s -; VBITS_GE_256-NEXT: and p1.b, p1/z, p1.b, p2.b -; VBITS_GE_256-NEXT: cmpne p2.d, p2/z, z0.d, #0 +; VBITS_GE_256-NEXT: ld1w { z1.d }, p1/z, [z1.d] +; VBITS_GE_256-NEXT: cmpne p1.d, p2/z, z0.d, #0 ; VBITS_GE_256-NEXT: ld1w { z0.d }, p1/z, [z2.d] ; VBITS_GE_256-NEXT: ptrue p1.s, vl4 -; VBITS_GE_256-NEXT: ld1w { z1.d }, p2/z, [z1.d] -; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s -; VBITS_GE_256-NEXT: splice z0.s, p1, z0.s, z1.s -; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0] +; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s +; VBITS_GE_256-NEXT: splice z1.s, p1, z1.s, z0.s +; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: masked_gather_v8i32: @@ -500,15 +500,15 @@ define void @masked_gather_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] -; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] -; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] +; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1, x8, lsl #3] +; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z0.d, #0 -; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1] -; VBITS_GE_256-NEXT: cmpeq p2.d, p0/z, z1.d, #0 +; VBITS_GE_256-NEXT: ld1d { z0.d }, p1/z, [z1.d] +; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] +; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z1.d, #0 ; VBITS_GE_256-NEXT: ld1d { z1.d }, p1/z, [z2.d] -; VBITS_GE_256-NEXT: ld1d { z0.d }, p2/z, [z0.d] -; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0, x8, lsl #3] -; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0] +; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] +; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: masked_gather_v8i64: @@ -626,23 +626,23 @@ define void @masked_gather_v8f16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: fcmeq v0.8h, v0.8h, #0.0 -; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; VBITS_GE_256-NEXT: sunpklo z2.s, z0.h +; VBITS_GE_256-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h -; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] -; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] -; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h +; VBITS_GE_256-NEXT: sunpklo z2.d, z2.s ; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s -; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s -; VBITS_GE_256-NEXT: cmpne p1.d, p0/z, z0.d, #0 -; VBITS_GE_256-NEXT: ld1h { z0.d }, p1/z, [z3.d] -; VBITS_GE_256-NEXT: cmpne p0.d, p0/z, z1.d, #0 -; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s -; VBITS_GE_256-NEXT: ld1h { z1.d }, p0/z, [z2.d] +; VBITS_GE_256-NEXT: cmpne p1.d, p0/z, z2.d, #0 +; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] +; VBITS_GE_256-NEXT: ld1h { z1.d }, p1/z, [z1.d] +; VBITS_GE_256-NEXT: cmpne p0.d, p0/z, z0.d, #0 +; VBITS_GE_256-NEXT: ld1h { z0.d }, p0/z, [z2.d] ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s -; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h -; VBITS_GE_256-NEXT: mov v0.d[1], v1.d[0] -; VBITS_GE_256-NEXT: str q0, [x0] +; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s +; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h +; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0] +; VBITS_GE_256-NEXT: str q1, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: masked_gather_v8f16: @@ -762,22 +762,22 @@ define void @masked_gather_v8f32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ptrue p2.d, vl4 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_GE_256-NEXT: ld1d { z1.d }, p2/z, [x1, x8, lsl #3] -; VBITS_GE_256-NEXT: ld1d { z2.d }, p2/z, [x1] +; VBITS_GE_256-NEXT: ld1d { z1.d }, p2/z, [x1] +; VBITS_GE_256-NEXT: ld1d { z2.d }, p2/z, [x1, x8, lsl #3] ; VBITS_GE_256-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0 +; VBITS_GE_256-NEXT: punpklo p3.h, p1.b ; VBITS_GE_256-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff -; VBITS_GE_256-NEXT: punpklo p1.h, p1.b ; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16 +; VBITS_GE_256-NEXT: and p1.b, p3/z, p3.b, p2.b ; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s -; VBITS_GE_256-NEXT: and p1.b, p1/z, p1.b, p2.b -; VBITS_GE_256-NEXT: cmpne p2.d, p2/z, z0.d, #0 +; VBITS_GE_256-NEXT: ld1w { z1.d }, p1/z, [z1.d] +; VBITS_GE_256-NEXT: cmpne p1.d, p2/z, z0.d, #0 ; VBITS_GE_256-NEXT: ld1w { z0.d }, p1/z, [z2.d] ; VBITS_GE_256-NEXT: ptrue p1.s, vl4 -; VBITS_GE_256-NEXT: ld1w { z1.d }, p2/z, [z1.d] -; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s -; VBITS_GE_256-NEXT: splice z0.s, p1, z0.s, z1.s -; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0] +; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s +; VBITS_GE_256-NEXT: splice z1.s, p1, z1.s, z0.s +; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: masked_gather_v8f32: @@ -911,11 +911,11 @@ define void @masked_gather_v8f64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: fcmeq p1.d, p0/z, z0.d, #0.0 -; VBITS_GE_256-NEXT: fcmeq p2.d, p0/z, z1.d, #0.0 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3] -; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: ld1d { z0.d }, p1/z, [z0.d] -; VBITS_GE_256-NEXT: ld1d { z1.d }, p2/z, [z1.d] +; VBITS_GE_256-NEXT: fcmeq p1.d, p0/z, z1.d, #0.0 +; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1] +; VBITS_GE_256-NEXT: ld1d { z1.d }, p1/z, [z1.d] ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll b/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll index bdaea0ecf144a..5ff9f0f0df62f 100644 --- a/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll +++ b/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll @@ -19,8 +19,8 @@ define i64 @scalable_int_min_max(ptr %arg, ptr %arg1, %i37, < ; CHECK-NEXT: smax z4.d, z4.d, #0 ; CHECK-NEXT: smin z4.d, p0/m, z4.d, z5.d ; CHECK-NEXT: cmpne p1.d, p0/z, z4.d, #0 -; CHECK-NEXT: ld1w { z4.d }, p1/z, [x1] ; CHECK-NEXT: ld1w { z0.d }, p1/z, [z0.d] +; CHECK-NEXT: ld1w { z4.d }, p1/z, [x1] ; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z4.s ; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, z3.s ; CHECK-NEXT: add z0.d, z2.d, z1.d diff --git a/llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll b/llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll index a40d550852798..4d2bc4bde13f5 100644 --- a/llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll +++ b/llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll @@ -31,9 +31,9 @@ define @no_dag_combine_sext( %pg, ; CHECK-LABEL: no_dag_combine_sext: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1b { z1.d }, p0/z, [z0.d, #16] -; CHECK-NEXT: ptrue p2.d +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: movprfx z0, z1 -; CHECK-NEXT: sxtb z0.d, p2/m, z1.d +; CHECK-NEXT: sxtb z0.d, p0/m, z1.d ; CHECK-NEXT: st1b { z1.d }, p1, [x0] ; CHECK-NEXT: ret %base, @@ -79,15 +79,15 @@ define @narrow_i64_gather_index_i8_zext(ptr %out, ptr %in, @narrow_i64_gather_index_i8_sext(ptr %out, ptr %in, @masked_gather_nxv8f16( %ptrs,