Skip to content

Commit b717365

Browse files
authored
[MachineScheduler][NFCI] Add Offset and OffsetIsScalable args to shouldClusterMemOps (#73778)
These are picked up from getMemOperandsWithOffsetWidth but weren't then being passed through to shouldClusterMemOps, which forces backends to collect the information again if they want to use the kind of heuristics typically used for the similar shouldScheduleLoadsNear function (e.g. checking the offset is within 1 cache line). This patch just adds the parameters, but doesn't attempt to use them. There is potential to use them in the current PPC and AArch64 shouldClusterMemOps implementation, and I intend to use the offset in the heuristic for RISC-V. I've left these for future patches in the interest of being as incremental as possible. As noted in the review and in an inline FIXME, an ElementCount-style abstraction may later be used to condense these two parameters to one argument. ElementCount isn't quite suitable as it doesn't support negative offsets.
1 parent 10f7801 commit b717365

11 files changed

+42
-12
lines changed

llvm/include/llvm/CodeGen/TargetInstrInfo.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1415,6 +1415,8 @@ class TargetInstrInfo : public MCInstrInfo {
14151415
/// Get the base operand and byte offset of an instruction that reads/writes
14161416
/// memory. This is a convenience function for callers that are only prepared
14171417
/// to handle a single base operand.
1418+
/// FIXME: Move Offset and OffsetIsScalable to some ElementCount-style
1419+
/// abstraction that supports negative offsets.
14181420
bool getMemOperandWithOffset(const MachineInstr &MI,
14191421
const MachineOperand *&BaseOp, int64_t &Offset,
14201422
bool &OffsetIsScalable,
@@ -1427,6 +1429,8 @@ class TargetInstrInfo : public MCInstrInfo {
14271429
/// It returns false if base operands and offset could not be determined.
14281430
/// It is not guaranteed to always recognize base operands and offsets in all
14291431
/// cases.
1432+
/// FIXME: Move Offset and OffsetIsScalable to some ElementCount-style
1433+
/// abstraction that supports negative offsets.
14301434
virtual bool getMemOperandsWithOffsetWidth(
14311435
const MachineInstr &MI, SmallVectorImpl<const MachineOperand *> &BaseOps,
14321436
int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
@@ -1497,12 +1501,18 @@ class TargetInstrInfo : public MCInstrInfo {
14971501
/// to TargetPassConfig::createMachineScheduler() to have an effect.
14981502
///
14991503
/// \p BaseOps1 and \p BaseOps2 are memory operands of two memory operations.
1504+
/// \p Offset1 and \p Offset2 are the byte offsets for the memory
1505+
/// operations.
1506+
/// \p OffsetIsScalable1 and \p OffsetIsScalable2 indicate if the offset is
1507+
/// scaled by a runtime quantity.
15001508
/// \p ClusterSize is the number of operations in the resulting load/store
15011509
/// cluster if this hook returns true.
15021510
/// \p NumBytes is the number of bytes that will be loaded from all the
15031511
/// clustered loads if this hook returns true.
15041512
virtual bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
1513+
int64_t Offset1, bool OffsetIsScalable1,
15051514
ArrayRef<const MachineOperand *> BaseOps2,
1515+
int64_t Offset2, bool OffsetIsScalable2,
15061516
unsigned ClusterSize,
15071517
unsigned NumBytes) const {
15081518
llvm_unreachable("target did not implement shouldClusterMemOps()");

llvm/lib/CodeGen/MachineScheduler.cpp

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1698,11 +1698,12 @@ class BaseMemOpClusterMutation : public ScheduleDAGMutation {
16981698
SmallVector<const MachineOperand *, 4> BaseOps;
16991699
int64_t Offset;
17001700
unsigned Width;
1701+
bool OffsetIsScalable;
17011702

17021703
MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps,
1703-
int64_t Offset, unsigned Width)
1704+
int64_t Offset, bool OffsetIsScalable, unsigned Width)
17041705
: SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset),
1705-
Width(Width) {}
1706+
Width(Width), OffsetIsScalable(OffsetIsScalable) {}
17061707

17071708
static bool Compare(const MachineOperand *const &A,
17081709
const MachineOperand *const &B) {
@@ -1831,8 +1832,10 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
18311832
SUnit2ClusterInfo[MemOpa.SU->NodeNum].second + MemOpb.Width;
18321833
}
18331834

1834-
if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps, ClusterLength,
1835-
CurrentClusterBytes))
1835+
if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpa.Offset,
1836+
MemOpa.OffsetIsScalable, MemOpb.BaseOps,
1837+
MemOpb.Offset, MemOpb.OffsetIsScalable,
1838+
ClusterLength, CurrentClusterBytes))
18361839
continue;
18371840

18381841
SUnit *SUa = MemOpa.SU;
@@ -1899,7 +1902,8 @@ void BaseMemOpClusterMutation::collectMemOpRecords(
18991902
unsigned Width;
19001903
if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset,
19011904
OffsetIsScalable, Width, TRI)) {
1902-
MemOpRecords.push_back(MemOpInfo(&SU, BaseOps, Offset, Width));
1905+
MemOpRecords.push_back(
1906+
MemOpInfo(&SU, BaseOps, Offset, OffsetIsScalable, Width));
19031907

19041908
LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "
19051909
<< Offset << ", OffsetIsScalable: " << OffsetIsScalable

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4238,8 +4238,9 @@ static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
42384238
///
42394239
/// Only called for LdSt for which getMemOperandWithOffset returns true.
42404240
bool AArch64InstrInfo::shouldClusterMemOps(
4241-
ArrayRef<const MachineOperand *> BaseOps1,
4242-
ArrayRef<const MachineOperand *> BaseOps2, unsigned ClusterSize,
4241+
ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
4242+
bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
4243+
int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
42434244
unsigned NumBytes) const {
42444245
assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
42454246
const MachineOperand &BaseOp1 = *BaseOps1.front();

llvm/lib/Target/AArch64/AArch64InstrInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,9 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
179179
int64_t &MinOffset, int64_t &MaxOffset);
180180

181181
bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
182+
int64_t Offset1, bool OffsetIsScalable1,
182183
ArrayRef<const MachineOperand *> BaseOps2,
184+
int64_t Offset2, bool OffsetIsScalable2,
183185
unsigned ClusterSize,
184186
unsigned NumBytes) const override;
185187

llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,10 @@ class SIInsertHardClauses : public MachineFunctionPass {
232232
// scheduler it limits the size of the cluster to avoid increasing
233233
// register pressure too much, but this pass runs after register
234234
// allocation so there is no need for that kind of limit.
235-
!SII->shouldClusterMemOps(CI.BaseOps, BaseOps, 2, 2)))) {
235+
// We also lie about the Offset and OffsetIsScalable parameters,
236+
// as they aren't used in the SIInstrInfo implementation.
237+
!SII->shouldClusterMemOps(CI.BaseOps, 0, false, BaseOps, 0, false,
238+
2, 2)))) {
236239
// Finish the current clause.
237240
Changed |= emitClause(CI, SII);
238241
CI = ClauseInfo();

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -541,7 +541,9 @@ static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
541541
}
542542

543543
bool SIInstrInfo::shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
544+
int64_t Offset1, bool OffsetIsScalable1,
544545
ArrayRef<const MachineOperand *> BaseOps2,
546+
int64_t Offset2, bool OffsetIsScalable2,
545547
unsigned ClusterSize,
546548
unsigned NumBytes) const {
547549
// If the mem ops (to be clustered) do not have the same base ptr, then they

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,9 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
234234
const TargetRegisterInfo *TRI) const final;
235235

236236
bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
237+
int64_t Offset1, bool OffsetIsScalable1,
237238
ArrayRef<const MachineOperand *> BaseOps2,
239+
int64_t Offset2, bool OffsetIsScalable2,
238240
unsigned ClusterSize,
239241
unsigned NumBytes) const override;
240242

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2877,8 +2877,9 @@ static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc,
28772877
}
28782878

28792879
bool PPCInstrInfo::shouldClusterMemOps(
2880-
ArrayRef<const MachineOperand *> BaseOps1,
2881-
ArrayRef<const MachineOperand *> BaseOps2, unsigned ClusterSize,
2880+
ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
2881+
bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
2882+
int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
28822883
unsigned NumBytes) const {
28832884

28842885
assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);

llvm/lib/Target/PowerPC/PPCInstrInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -532,7 +532,9 @@ class PPCInstrInfo : public PPCGenInstrInfo {
532532
/// Returns true if the two given memory operations should be scheduled
533533
/// adjacent.
534534
bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
535+
int64_t Offset1, bool OffsetIsScalable1,
535536
ArrayRef<const MachineOperand *> BaseOps2,
537+
int64_t Offset2, bool OffsetIsScalable2,
536538
unsigned ClusterSize,
537539
unsigned NumBytes) const override;
538540

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2266,8 +2266,9 @@ static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
22662266
}
22672267

22682268
bool RISCVInstrInfo::shouldClusterMemOps(
2269-
ArrayRef<const MachineOperand *> BaseOps1,
2270-
ArrayRef<const MachineOperand *> BaseOps2, unsigned ClusterSize,
2269+
ArrayRef<const MachineOperand *> BaseOps1, int64_t Offset1,
2270+
bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
2271+
int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize,
22712272
unsigned NumBytes) const {
22722273
// If the mem ops (to be clustered) do not have the same base ptr, then they
22732274
// should not be clustered

llvm/lib/Target/RISCV/RISCVInstrInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,9 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
158158
const TargetRegisterInfo *TRI) const override;
159159

160160
bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
161+
int64_t Offset1, bool OffsetIsScalable1,
161162
ArrayRef<const MachineOperand *> BaseOps2,
163+
int64_t Offset2, bool OffsetIsScalable2,
162164
unsigned ClusterSize,
163165
unsigned NumBytes) const override;
164166

0 commit comments

Comments
 (0)