Skip to content

[LLVM][AArch64][CodeGen] Mark FFR as a reserved register. #83437

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,10 @@ AArch64RegisterInfo::getStrictlyReservedRegs(const MachineFunction &MF) const {
if (MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
markSuperRegs(Reserved, AArch64::W16);

// FFR is modelled as global state that cannot be allocated.
if (MF.getSubtarget<AArch64Subtarget>().hasSVE())
Reserved.set(AArch64::FFR);

// SME tiles are not allocatable.
if (MF.getSubtarget<AArch64Subtarget>().hasSME()) {
for (MCPhysReg SubReg : subregs_inclusive(AArch64::ZA))
Expand Down
10 changes: 5 additions & 5 deletions llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -488,11 +488,11 @@ def AArch64fmin_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fmin, AAr
def AArch64fmax_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fmax, AArch64fmax_p>;

let Predicates = [HasSVE] in {
defm RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>;
def RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">;
defm RDFFR_P : sve_int_rdffr_unpred<"rdffr", int_aarch64_sve_rdffr>;
def SETFFR : sve_int_setffr<"setffr", int_aarch64_sve_setffr>;
def WRFFR : sve_int_wrffr<"wrffr", int_aarch64_sve_wrffr>;
def RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>;
def RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">;
def RDFFR_P : sve_int_rdffr_unpred<"rdffr", int_aarch64_sve_rdffr>;
def SETFFR : sve_int_setffr<"setffr", int_aarch64_sve_setffr>;
def WRFFR : sve_int_wrffr<"wrffr", int_aarch64_sve_wrffr>;
} // End HasSVE

let Predicates = [HasSVEorSME] in {
Expand Down
52 changes: 26 additions & 26 deletions llvm/lib/Target/AArch64/AArch64SchedA510.td
Original file line number Diff line number Diff line change
Expand Up @@ -1198,24 +1198,24 @@ def : InstRW<[CortexA510Write<3, CortexA510UnitLdSt>], (instregex "^LDNT1[BHWD]_
def : InstRW<[CortexA510Write<3, CortexA510UnitLdSt>], (instregex "^LDNT1[BHWD]_ZRR$")>;

// Non temporal gather load, vector + scalar 32-bit element size
def : InstRW<[CortexA510MCWrite<9, 9, CortexA510UnitLdSt>], (instregex "^LDNT1[BHW]_ZZR_S_REAL$",
"^LDNT1S[BH]_ZZR_S_REAL$")>;
def : InstRW<[CortexA510MCWrite<9, 9, CortexA510UnitLdSt>], (instregex "^LDNT1[BHW]_ZZR_S$",
"^LDNT1S[BH]_ZZR_S$")>;

// Non temporal gather load, vector + scalar 64-bit element size
def : InstRW<[CortexA510MCWrite<7, 7, CortexA510UnitLdSt>], (instregex "^LDNT1S?[BHW]_ZZR_D_REAL$")>;
def : InstRW<[CortexA510MCWrite<7, 7, CortexA510UnitLdSt>], (instrs LDNT1D_ZZR_D_REAL)>;
def : InstRW<[CortexA510MCWrite<7, 7, CortexA510UnitLdSt>], (instregex "^LDNT1S?[BHW]_ZZR_D$")>;
def : InstRW<[CortexA510MCWrite<7, 7, CortexA510UnitLdSt>], (instrs LDNT1D_ZZR_D)>;

// Contiguous first faulting load, scalar + scalar
def : InstRW<[CortexA510Write<3, CortexA510UnitLd>], (instregex "^LDFF1[BHWD]_REAL$",
"^LDFF1S?B_[HSD]_REAL$",
"^LDFF1S?H_[SD]_REAL$",
"^LDFF1S?W_D_REAL$")>;
def : InstRW<[CortexA510Write<3, CortexA510UnitLd>], (instregex "^LDFF1[BHWD]$",
"^LDFF1S?B_[HSD]$",
"^LDFF1S?H_[SD]$",
"^LDFF1S?W_D$")>;

// Contiguous non faulting load, scalar + imm
def : InstRW<[CortexA510Write<3, CortexA510UnitLd>], (instregex "^LDNF1[BHWD]_IMM_REAL$",
"^LDNF1S?B_[HSD]_IMM_REAL$",
"^LDNF1S?H_[SD]_IMM_REAL$",
"^LDNF1S?W_D_IMM_REAL$")>;
def : InstRW<[CortexA510Write<3, CortexA510UnitLd>], (instregex "^LDNF1[BHWD]_IMM$",
"^LDNF1S?B_[HSD]_IMM$",
"^LDNF1S?H_[SD]_IMM$",
"^LDNF1S?W_D_IMM$")>;

// Contiguous Load two structures to two vectors, scalar + imm
def : InstRW<[CortexA510MCWrite<3, 1, CortexA510UnitLdSt>], (instregex "^LD2[BHWD]_IMM$")>;
Expand All @@ -1236,28 +1236,28 @@ def : InstRW<[CortexA510MCWrite<5, 3, CortexA510UnitLdSt>], (instregex "^LD4[BHW
def : InstRW<[CortexA510MCWrite<5, 3, CortexA510UnitLdSt>], (instregex "^LD4[BHWD]$")>;

// Gather load, vector + imm, 32-bit element size
def : InstRW<[CortexA510MCWrite<9, 9, CortexA510UnitLdSt>], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$",
"^GLD(FF)?1W_IMM_REAL$")>;
def : InstRW<[CortexA510MCWrite<9, 9, CortexA510UnitLdSt>], (instregex "^GLD(FF)?1S?[BH]_S_IMM$",
"^GLD(FF)?1W_IMM$")>;

// Gather load, vector + imm, 64-bit element size
def : InstRW<[CortexA510MCWrite<7, 7, CortexA510UnitLdSt>], (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$",
"^GLD(FF)?1D_IMM_REAL$")>;
def : InstRW<[CortexA510MCWrite<7, 7, CortexA510UnitLdSt>], (instregex "^GLD(FF)?1S?[BHW]_D_IMM$",
"^GLD(FF)?1D_IMM$")>;

// Gather load, 64-bit element size
def : InstRW<[CortexA510MCWrite<7, 7, CortexA510UnitLdSt>],
(instregex "^GLD(FF)?1S?[BHW]_D_[SU]XTW_(SCALED_)?REAL$",
"^GLD(FF)?1S?[BHW]_D_(SCALED_)?REAL$",
"^GLD(FF)?1D_[SU]XTW_(SCALED_)?REAL$",
"^GLD(FF)?1D_(SCALED_)?REAL$")>;
(instregex "^GLD(FF)?1S?[BHW]_D_[SU]XTW(_SCALED)?$",
"^GLD(FF)?1S?[BHW]_D(_SCALED)?$",
"^GLD(FF)?1D_[SU]XTW(_SCALED)?$",
"^GLD(FF)?1D(_SCALED)?$")>;

// Gather load, 32-bit scaled offset
def : InstRW<[CortexA510MCWrite<9, 9, CortexA510UnitLd>],
(instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED_REAL$",
"^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>;
(instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED$",
"^GLD(FF)?1W_[SU]XTW_SCALED")>;

// Gather load, 32-bit unpacked unscaled offset
def : InstRW<[CortexA510MCWrite<9, 9, CortexA510UnitLd>], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$",
"^GLD(FF)?1W_[SU]XTW_REAL$")>;
def : InstRW<[CortexA510MCWrite<9, 9, CortexA510UnitLd>], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW$",
"^GLD(FF)?1W_[SU]XTW$")>;

def : InstRW<[CortexA510Write<0, CortexA510UnitVALU>], (instregex "^PRF(B|H|W|D).*")>;
// SVE Store instructions
Expand Down Expand Up @@ -1357,10 +1357,10 @@ def : InstRW<[CortexA510VSt<8>], (instregex "^SST1[BHW]_D$",
// -----------------------------------------------------------------------------

// Read first fault register, unpredicated
def : InstRW<[CortexA510Write<1, CortexA510UnitALU>], (instrs RDFFR_P_REAL)>;
def : InstRW<[CortexA510Write<1, CortexA510UnitALU>], (instrs RDFFR_P)>;

// Read first fault register, predicated
def : InstRW<[CortexA510Write<3, CortexA510UnitALU0>], (instrs RDFFR_PPz_REAL)>;
def : InstRW<[CortexA510Write<3, CortexA510UnitALU0>], (instrs RDFFR_PPz)>;

// Read first fault register and set flags
def : InstRW<[CortexA510Write<3, CortexA510UnitALU0>], (instrs RDFFRS_PPz)>;
Expand Down
52 changes: 26 additions & 26 deletions llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
Original file line number Diff line number Diff line change
Expand Up @@ -2110,24 +2110,24 @@ def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNT1[BHWD]_ZRI$")>;
def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDNT1[BHWD]_ZRR$")>;

// Non temporal gather load, vector + scalar 32-bit element size
def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LDNT1[BHW]_ZZR_S_REAL$",
"^LDNT1S[BH]_ZZR_S_REAL$")>;
def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LDNT1[BHW]_ZZR_S$",
"^LDNT1S[BH]_ZZR_S$")>;

// Non temporal gather load, vector + scalar 64-bit element size
def : InstRW<[N2Write_10cyc_2L_2V1], (instregex "^LDNT1S?[BHW]_ZZR_D_REAL$")>;
def : InstRW<[N2Write_10cyc_2L_2V1], (instrs LDNT1D_ZZR_D_REAL)>;
def : InstRW<[N2Write_10cyc_2L_2V1], (instregex "^LDNT1S?[BHW]_ZZR_D$")>;
def : InstRW<[N2Write_10cyc_2L_2V1], (instrs LDNT1D_ZZR_D)>;

// Contiguous first faulting load, scalar + scalar
def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDFF1[BHWD]_REAL$",
"^LDFF1S?B_[HSD]_REAL$",
"^LDFF1S?H_[SD]_REAL$",
"^LDFF1S?W_D_REAL$")>;
def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDFF1[BHWD]$",
"^LDFF1S?B_[HSD]$",
"^LDFF1S?H_[SD]$",
"^LDFF1S?W_D$")>;

// Contiguous non faulting load, scalar + imm
def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNF1[BHWD]_IMM_REAL$",
"^LDNF1S?B_[HSD]_IMM_REAL$",
"^LDNF1S?H_[SD]_IMM_REAL$",
"^LDNF1S?W_D_IMM_REAL$")>;
def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNF1[BHWD]_IMM$",
"^LDNF1S?B_[HSD]_IMM$",
"^LDNF1S?H_[SD]_IMM$",
"^LDNF1S?W_D_IMM$")>;

// Contiguous Load two structures to two vectors, scalar + imm
def : InstRW<[N2Write_8cyc_1L_1V], (instregex "^LD2[BHWD]_IMM$")>;
Expand All @@ -2148,28 +2148,28 @@ def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^LD4[BHWD]_IMM$")>;
def : InstRW<[N2Write_10cyc_2L_2V_2S], (instregex "^LD4[BHWD]$")>;

// Gather load, vector + imm, 32-bit element size
def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$",
"^GLD(FF)?1W_IMM_REAL$")>;
def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM$",
"^GLD(FF)?1W_IMM$")>;

// Gather load, vector + imm, 64-bit element size
def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$",
"^GLD(FF)?1D_IMM_REAL$")>;
def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM$",
"^GLD(FF)?1D_IMM$")>;

// Gather load, 64-bit element size
def : InstRW<[N2Write_9cyc_2L_2V],
(instregex "^GLD(FF)?1S?[BHW]_D_[SU]XTW_(SCALED_)?REAL$",
"^GLD(FF)?1S?[BHW]_D_(SCALED_)?REAL$",
"^GLD(FF)?1D_[SU]XTW_(SCALED_)?REAL$",
"^GLD(FF)?1D_(SCALED_)?REAL$")>;
(instregex "^GLD(FF)?1S?[BHW]_D_[SU]XTW(_SCALED)?$",
"^GLD(FF)?1S?[BHW]_D(_SCALED)?$",
"^GLD(FF)?1D_[SU]XTW(_SCALED)?$",
"^GLD(FF)?1D(_SCALED)?$")>;

// Gather load, 32-bit scaled offset
def : InstRW<[N2Write_10cyc_2L_2V],
(instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED_REAL$",
"^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>;
(instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED$",
"^GLD(FF)?1W_[SU]XTW_SCALED")>;

// Gather load, 32-bit unpacked unscaled offset
def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$",
"^GLD(FF)?1W_[SU]XTW_REAL$")>;
def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW$",
"^GLD(FF)?1W_[SU]XTW$")>;

// SVE Store instructions
// -----------------------------------------------------------------------------
Expand Down Expand Up @@ -2268,10 +2268,10 @@ def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D$",
// -----------------------------------------------------------------------------

// Read first fault register, unpredicated
def : InstRW<[N2Write_2cyc_1M0], (instrs RDFFR_P_REAL)>;
def : InstRW<[N2Write_2cyc_1M0], (instrs RDFFR_P)>;

// Read first fault register, predicated
def : InstRW<[N2Write_3cyc_1M0_1M], (instrs RDFFR_PPz_REAL)>;
def : InstRW<[N2Write_3cyc_1M0_1M], (instrs RDFFR_PPz)>;

// Read first fault register and set flags
def : InstRW<[N2Write_4cyc_2M0_2M], (instrs RDFFRS_PPz)>;
Expand Down
42 changes: 21 additions & 21 deletions llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
Original file line number Diff line number Diff line change
Expand Up @@ -1714,17 +1714,17 @@ def : InstRW<[V1Write_7c_1L01_1S], (instrs LDNT1H_ZRR)>;
def : InstRW<[V1Write_6c_1L01_1S], (instregex "^LDNT1[BWD]_ZRR$")>;

// Contiguous first faulting load, scalar + scalar
def : InstRW<[V1Write_7c_1L01_1S], (instregex "^LDFF1H_REAL$",
"^LDFF1S?H_[SD]_REAL$")>;
def : InstRW<[V1Write_6c_1L01_1S], (instregex "^LDFF1[BWD]_REAL$",
"^LDFF1S?B_[HSD]_REAL$",
"^LDFF1S?W_D_REAL$")>;
def : InstRW<[V1Write_7c_1L01_1S], (instregex "^LDFF1H$",
"^LDFF1S?H_[SD]$")>;
def : InstRW<[V1Write_6c_1L01_1S], (instregex "^LDFF1[BWD]$",
"^LDFF1S?B_[HSD]$",
"^LDFF1S?W_D$")>;

// Contiguous non faulting load, scalar + imm
def : InstRW<[V1Write_6c_1L01], (instregex "^LDNF1[BHWD]_IMM_REAL$",
"^LDNF1S?B_[HSD]_IMM_REAL$",
"^LDNF1S?H_[SD]_IMM_REAL$",
"^LDNF1S?W_D_IMM_REAL$")>;
def : InstRW<[V1Write_6c_1L01], (instregex "^LDNF1[BHWD]_IMM$",
"^LDNF1S?B_[HSD]_IMM$",
"^LDNF1S?H_[SD]_IMM$",
"^LDNF1S?W_D_IMM$")>;

// Contiguous Load two structures to two vectors, scalar + imm
def : InstRW<[V1Write_8c_2L01_2V01], (instregex "^LD2[BHWD]_IMM$")>;
Expand All @@ -1746,25 +1746,25 @@ def : InstRW<[V1Write_12c_4L01_4V01], (instregex "^LD4[BHWD]_IMM$")>;
def : InstRW<[V1Write_13c_4L01_2S_4V01], (instregex "^LD4[BHWD]$")>;

// Gather load, vector + imm, 32-bit element size
def : InstRW<[V1Write_11c_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$",
"^GLD(FF)?1W_IMM_REAL$")>;
def : InstRW<[V1Write_11c_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM$",
"^GLD(FF)?1W_IMM$")>;

// Gather load, vector + imm, 64-bit element size
def : InstRW<[V1Write_9c_2L_2V],
(instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$",
"^GLD(FF)?1S?[BHW]_D_([SU]XTW_)?(SCALED_)?REAL$",
"^GLD(FF)?1D_IMM_REAL$",
"^GLD(FF)?1D_([SU]XTW_)?(SCALED_)?REAL$")>;
(instregex "^GLD(FF)?1S?[BHW]_D_IMM$",
"^GLD(FF)?1S?[BHW]_D(_[SU]XTW)?(_SCALED)?$",
"^GLD(FF)?1D_IMM$",
"^GLD(FF)?1D(_[SU]XTW)?(_SCALED)?$")>;

// Gather load, 32-bit scaled offset
def : InstRW<[V1Write_11c_2L_2V],
(instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED_REAL$",
"^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>;
(instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED$",
"^GLD(FF)?1W_[SU]XTW_SCALED")>;

// Gather load, 32-bit unpacked unscaled offset
def : InstRW<[V1Write_9c_1L_1V],
(instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$",
"^GLD(FF)?1W_[SU]XTW_REAL$")>;
(instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW$",
"^GLD(FF)?1W_[SU]XTW$")>;

// Prefetch
// NOTE: This is not specified in the SOG.
Expand Down Expand Up @@ -1848,12 +1848,12 @@ def : InstRW<[V1Write_6c_1L01_1V], (instregex "^SST1[BHW]_D_IMM$",
// Read first fault register, unpredicated
// Set first fault register
// Write to first fault register
def : InstRW<[V1Write_2c_1M0], (instrs RDFFR_P_REAL,
def : InstRW<[V1Write_2c_1M0], (instrs RDFFR_P,
SETFFR,
WRFFR)>;

// Read first fault register, predicated
def : InstRW<[V1Write_3c_2M0], (instrs RDFFR_PPz_REAL)>;
def : InstRW<[V1Write_3c_2M0], (instrs RDFFR_PPz)>;

// Read first fault register and set flags
def : InstRW<[V1Write_4c_1M], (instrs RDFFRS_PPz)>;
Expand Down
Loading