Skip to content

Commit ef38045

Browse files
committed
[RISCV] Select atomic_{load/store} to pseudos and expand them later
In previous implementation, we will insert leading/trailing fences in `AtomicExpandPass` and change the memory order to `monotonic` before ISel. And then atomic load/store are selected to normal load/store instructions. In this patch, we won't insert fences for atomic load/store before ISel and we select them to pseudos. Then, we expand them just like other atomic operations in `RISCVExpandAtomicPseudo`. I do this for two reason: 1. All atomic operations are expanded in `RISCVExpandAtomicPseudo` except atomic load/store, which is inconsistent and a little confusing when digging into the implementation. 2. For some hardware implementations, `load+fence` and `fence+store` can be fused to optimized macro instructions. This requires that fence and load/store are glued together. We can achieve this via defining `DAGMutation`s in `RISCVMacroFusion.cpp`, but I think this expansion method is more straightforward since we consider atomic load/store as whole instructions.
1 parent a93e76d commit ef38045

File tree

5 files changed

+224
-55
lines changed

5 files changed

+224
-55
lines changed

llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp

Lines changed: 105 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ class RISCVExpandAtomicPseudo : public MachineFunctionPass {
4848
bool expandMBB(MachineBasicBlock &MBB);
4949
bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
5050
MachineBasicBlock::iterator &NextMBBI);
51+
bool expandAtomicLoadStore(MachineBasicBlock &MBB,
52+
MachineBasicBlock::iterator MBBI,
53+
unsigned int Opcode, bool IsLoad);
5154
bool expandAtomicBinOp(MachineBasicBlock &MBB,
5255
MachineBasicBlock::iterator MBBI, AtomicRMWInst::BinOp,
5356
bool IsMasked, int Width,
@@ -111,6 +114,22 @@ bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB,
111114
// expanded instructions for each pseudo is correct in the Size field of the
112115
// tablegen definition for the pseudo.
113116
switch (MBBI->getOpcode()) {
117+
case RISCV::PseudoAtomicLB:
118+
return expandAtomicLoadStore(MBB, MBBI, RISCV::LB, true);
119+
case RISCV::PseudoAtomicLH:
120+
return expandAtomicLoadStore(MBB, MBBI, RISCV::LH, true);
121+
case RISCV::PseudoAtomicLW:
122+
return expandAtomicLoadStore(MBB, MBBI, RISCV::LW, true);
123+
case RISCV::PseudoAtomicLD:
124+
return expandAtomicLoadStore(MBB, MBBI, RISCV::LD, true);
125+
case RISCV::PseudoAtomicSB:
126+
return expandAtomicLoadStore(MBB, MBBI, RISCV::SB, false);
127+
case RISCV::PseudoAtomicSH:
128+
return expandAtomicLoadStore(MBB, MBBI, RISCV::SH, false);
129+
case RISCV::PseudoAtomicSW:
130+
return expandAtomicLoadStore(MBB, MBBI, RISCV::SW, false);
131+
case RISCV::PseudoAtomicSD:
132+
return expandAtomicLoadStore(MBB, MBBI, RISCV::SD, false);
114133
case RISCV::PseudoAtomicLoadNand32:
115134
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32,
116135
NextMBBI);
@@ -385,6 +404,91 @@ static void doMaskedAtomicBinOpExpansion(const RISCVInstrInfo *TII,
385404
.addMBB(LoopMBB);
386405
}
387406

407+
static void insertFence(const RISCVInstrInfo *TII, MachineBasicBlock &MBB,
408+
MachineBasicBlock::iterator MBBI, DebugLoc DL,
409+
AtomicOrdering Ordering) {
410+
// fence acq_rel -> fence.tso
411+
if (Ordering == AtomicOrdering::AcquireRelease) {
412+
BuildMI(MBB, MBBI, DL, TII->get(RISCV::FENCE_TSO));
413+
} else {
414+
int Pred, Succ;
415+
switch (Ordering) {
416+
default:
417+
llvm_unreachable("Unsupported AtomicOrdering");
418+
case AtomicOrdering::Acquire:
419+
// fence acquire -> fence r, rw
420+
Pred = 0b10;
421+
Succ = 0b11;
422+
break;
423+
case AtomicOrdering::Release:
424+
// fence release -> fence rw, w
425+
Pred = 0b11;
426+
Succ = 0b01;
427+
break;
428+
case AtomicOrdering::SequentiallyConsistent:
429+
// fence seq_cst -> fence rw, rw
430+
Pred = 0b11;
431+
Succ = 0b11;
432+
break;
433+
}
434+
BuildMI(MBB, MBBI, DL, TII->get(RISCV::FENCE)).addImm(Pred).addImm(Succ);
435+
}
436+
}
437+
438+
static void emitLeadingFence(const RISCVSubtarget *Subtarget,
439+
const RISCVInstrInfo *TII, MachineBasicBlock &MBB,
440+
MachineBasicBlock::iterator MBBI, DebugLoc DL,
441+
AtomicOrdering Ordering, bool IsLoad) {
442+
if (Subtarget->hasStdExtZtso()) {
443+
if (IsLoad && Ordering == AtomicOrdering::SequentiallyConsistent)
444+
insertFence(TII, MBB, MBBI, DL, Ordering);
445+
return;
446+
}
447+
448+
if (IsLoad && Ordering == AtomicOrdering::SequentiallyConsistent) {
449+
insertFence(TII, MBB, MBBI, DL, Ordering);
450+
return;
451+
}
452+
453+
if (!IsLoad && isReleaseOrStronger(Ordering))
454+
insertFence(TII, MBB, MBBI, DL, AtomicOrdering::Release);
455+
}
456+
457+
static void emitTrailingFence(const RISCVSubtarget *Subtarget,
458+
const RISCVInstrInfo *TII, MachineBasicBlock &MBB,
459+
MachineBasicBlock::iterator MBBI, DebugLoc DL,
460+
AtomicOrdering Ordering, bool IsLoad) {
461+
if (Subtarget->hasStdExtZtso()) {
462+
if (!IsLoad && Ordering == AtomicOrdering::SequentiallyConsistent)
463+
insertFence(TII, MBB, MBBI, DL, Ordering);
464+
return;
465+
}
466+
467+
if (IsLoad && isAcquireOrStronger(Ordering)) {
468+
insertFence(TII, MBB, MBBI, DL, AtomicOrdering::Acquire);
469+
return;
470+
}
471+
472+
if (Subtarget->enableSeqCstTrailingFence() && !IsLoad &&
473+
Ordering == AtomicOrdering::SequentiallyConsistent)
474+
insertFence(TII, MBB, MBBI, DL, AtomicOrdering::SequentiallyConsistent);
475+
}
476+
477+
bool RISCVExpandAtomicPseudo::expandAtomicLoadStore(
478+
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
479+
unsigned int Opcode, bool IsLoad) {
480+
auto Ordering = static_cast<AtomicOrdering>(MBBI->getOperand(3).getImm());
481+
DebugLoc DL = MBBI->getDebugLoc();
482+
emitLeadingFence(STI, TII, MBB, MBBI, DL, Ordering, IsLoad);
483+
BuildMI(MBB, MBBI, DL, TII->get(Opcode))
484+
.add(MBBI->getOperand(0))
485+
.add(MBBI->getOperand(1))
486+
.add(MBBI->getOperand(2));
487+
emitTrailingFence(STI, TII, MBB, MBBI, DL, Ordering, IsLoad);
488+
MBBI->eraseFromParent();
489+
return true;
490+
}
491+
388492
bool RISCVExpandAtomicPseudo::expandAtomicBinOp(
389493
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
390494
AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
@@ -554,7 +658,7 @@ bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
554658
computeAndAddLiveIns(LiveRegs, *DoneMBB);
555659

556660
return true;
557-
}
661+
}
558662

559663
// If a BNE on the cmpxchg comparison result immediately follows the cmpxchg
560664
// operation, it can be folded into the cmpxchg expansion by

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 0 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -17770,39 +17770,6 @@ void RISCVTargetLowering::LowerAsmOperandForConstraint(
1777017770
TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
1777117771
}
1777217772

17773-
Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
17774-
Instruction *Inst,
17775-
AtomicOrdering Ord) const {
17776-
if (Subtarget.hasStdExtZtso()) {
17777-
if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
17778-
return Builder.CreateFence(Ord);
17779-
return nullptr;
17780-
}
17781-
17782-
if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
17783-
return Builder.CreateFence(Ord);
17784-
if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
17785-
return Builder.CreateFence(AtomicOrdering::Release);
17786-
return nullptr;
17787-
}
17788-
17789-
Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
17790-
Instruction *Inst,
17791-
AtomicOrdering Ord) const {
17792-
if (Subtarget.hasStdExtZtso()) {
17793-
if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
17794-
return Builder.CreateFence(Ord);
17795-
return nullptr;
17796-
}
17797-
17798-
if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
17799-
return Builder.CreateFence(AtomicOrdering::Acquire);
17800-
if (Subtarget.enableSeqCstTrailingFence() && isa<StoreInst>(Inst) &&
17801-
Ord == AtomicOrdering::SequentiallyConsistent)
17802-
return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
17803-
return nullptr;
17804-
}
17805-
1780617773
TargetLowering::AtomicExpansionKind
1780717774
RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
1780817775
// atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -613,14 +613,6 @@ class RISCVTargetLowering : public TargetLowering {
613613

614614
bool preferZeroCompareBranch() const override { return true; }
615615

616-
bool shouldInsertFencesForAtomic(const Instruction *I) const override {
617-
return isa<LoadInst>(I) || isa<StoreInst>(I);
618-
}
619-
Instruction *emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst,
620-
AtomicOrdering Ord) const override;
621-
Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
622-
AtomicOrdering Ord) const override;
623-
624616
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
625617
EVT VT) const override;
626618

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1270,20 +1270,52 @@ unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
12701270
if (MI.isMetaInstruction())
12711271
return 0;
12721272

1273+
const MachineFunction &MF = *MI.getParent()->getParent();
1274+
const auto &ST = MF.getSubtarget<RISCVSubtarget>();
1275+
12731276
unsigned Opcode = MI.getOpcode();
1277+
switch (Opcode) {
1278+
default:
1279+
break;
1280+
case RISCV::PseudoAtomicLB:
1281+
case RISCV::PseudoAtomicLH:
1282+
case RISCV::PseudoAtomicLW:
1283+
case RISCV::PseudoAtomicLD: {
1284+
auto Ordering = static_cast<AtomicOrdering>(MI.getOperand(3).getImm());
1285+
switch (Ordering) {
1286+
default:
1287+
return 4;
1288+
case AtomicOrdering::Acquire:
1289+
return 8;
1290+
case AtomicOrdering::SequentiallyConsistent:
1291+
return ST.hasStdExtZtso() ? 8 : 12;
1292+
}
1293+
}
1294+
case RISCV::PseudoAtomicSB:
1295+
case RISCV::PseudoAtomicSH:
1296+
case RISCV::PseudoAtomicSW:
1297+
case RISCV::PseudoAtomicSD: {
1298+
auto Ordering = static_cast<AtomicOrdering>(MI.getOperand(3).getImm());
1299+
switch (Ordering) {
1300+
default:
1301+
return 4;
1302+
case AtomicOrdering::Release:
1303+
return 8;
1304+
case AtomicOrdering::SequentiallyConsistent:
1305+
return ST.hasStdExtZtso() ? 8 : ST.enableSeqCstTrailingFence() ? 12 : 8;
1306+
}
1307+
}
1308+
}
12741309

12751310
if (Opcode == TargetOpcode::INLINEASM ||
12761311
Opcode == TargetOpcode::INLINEASM_BR) {
1277-
const MachineFunction &MF = *MI.getParent()->getParent();
12781312
const auto &TM = static_cast<const RISCVTargetMachine &>(MF.getTarget());
12791313
return getInlineAsmLength(MI.getOperand(0).getSymbolName(),
12801314
*TM.getMCAsmInfo());
12811315
}
12821316

12831317
if (!MI.memoperands_empty()) {
12841318
MachineMemOperand *MMO = *(MI.memoperands_begin());
1285-
const MachineFunction &MF = *MI.getParent()->getParent();
1286-
const auto &ST = MF.getSubtarget<RISCVSubtarget>();
12871319
if (ST.hasStdExtZihintntl() && MMO->isNonTemporal()) {
12881320
if (ST.hasStdExtCOrZca() && ST.enableRVCHintInstrs()) {
12891321
if (isCompressibleInst(MI, STI))

llvm/lib/Target/RISCV/RISCVInstrInfoA.td

Lines changed: 84 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -110,21 +110,95 @@ defm AMOCAS_Q : AMO_rr_aq_rl<0b00101, 0b100, "amocas.q">;
110110
//===----------------------------------------------------------------------===//
111111

112112
// Atomic load/store are available under both +a and +force-atomics.
113-
// Fences will be inserted for atomic load/stores according to the logic in
114-
// RISCVTargetLowering::{emitLeadingFence,emitTrailingFence}.
113+
114+
// Pseudo atomic load instructions.
115+
class PseudoAtomicLoad
116+
: Pseudo<(outs GPR:$rd), (ins GPRMem:$rs1, simm12:$imm12, ixlenimm:$ordering), []> {
117+
let hasSideEffects = 1;
118+
let mayLoad = 1;
119+
let mayStore = 0;
120+
}
121+
122+
// Pseudo atomic store instructions.
123+
class PseudoAtomicStore
124+
: Pseudo<(outs), (ins GPR:$rs2, GPRMem:$rs1, simm12:$imm12, ixlenimm:$ordering), []> {
125+
let hasSideEffects = 1;
126+
let mayLoad = 0;
127+
let mayStore = 1;
128+
}
129+
130+
let IsAtomic = 1 in {
131+
class AtomicLoadPatFrag<PatFrags base> : PatFrag<(ops node:$ptr), (base node:$ptr)>;
132+
class AtomicStorePatFrag<PatFrags base> : PatFrag<(ops node:$val, node:$ptr),
133+
(base node:$val, node:$ptr)>;
134+
}
135+
136+
// An atomic load operation that doesn't actually need to be atomic.
137+
let IsAtomicOrderingAcquireOrStronger = 0 in
138+
class relaxed_load<PatFrags base> : AtomicLoadPatFrag<base>;
139+
140+
// An atomic load operation that needs acquire semantics.
141+
let IsAtomicOrderingAcquire = 1 in
142+
class acquire_load<PatFrags base> : AtomicLoadPatFrag<base>;
143+
144+
// An atomic load operation that needs sequential consistency semantics.
145+
let IsAtomicOrderingSequentiallyConsistent = 1 in
146+
class seq_cst_load<PatFrags base> : AtomicLoadPatFrag<base>;
147+
148+
// An atomic store operation that doesn't actually need to be atomic.
149+
let IsAtomicOrderingReleaseOrStronger = 0 in
150+
class relaxed_store<PatFrags base> : AtomicStorePatFrag<base>;
151+
152+
// An atomic store operation that needs release semantics.
153+
let IsAtomicOrderingRelease = 1 in
154+
class release_store<PatFrags base> : AtomicStorePatFrag<base>;
155+
156+
// An atomic store operation that needs sequential consistency semantics.
157+
let IsAtomicOrderingSequentiallyConsistent = 1 in
158+
class seq_cst_store<PatFrags base> : AtomicStorePatFrag<base>;
159+
160+
multiclass AtomicLdPat<PatFrag LoadOp, RVInst Inst> {
161+
def : Pat<(XLenVT (relaxed_load<LoadOp> (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm12))),
162+
(Inst GPR:$rs1, simm12:$imm12, 2)>;
163+
def : Pat<(XLenVT (acquire_load<LoadOp> (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm12))),
164+
(Inst GPR:$rs1, simm12:$imm12, 4)>;
165+
def : Pat<(XLenVT (seq_cst_load<LoadOp> (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm12))),
166+
(Inst GPR:$rs1, simm12:$imm12, 7)>;
167+
}
168+
169+
multiclass AtomicStPat<PatFrag StoreOp, RVInst Inst> {
170+
def : Pat<(relaxed_store<StoreOp> (XLenVT GPR:$rs2), (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm12)),
171+
(Inst GPR:$rs2, GPR:$rs1, simm12:$imm12, 2)>;
172+
def : Pat<(release_store<StoreOp> (XLenVT GPR:$rs2), (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm12)),
173+
(Inst GPR:$rs2, GPR:$rs1, simm12:$imm12, 5)>;
174+
def : Pat<(seq_cst_store<StoreOp> (XLenVT GPR:$rs2), (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm12)),
175+
(Inst GPR:$rs2, GPR:$rs1, simm12:$imm12, 7)>;
176+
}
177+
115178
let Predicates = [HasAtomicLdSt] in {
116-
def : LdPat<atomic_load_8, LB>;
117-
def : LdPat<atomic_load_16, LH>;
118-
def : LdPat<atomic_load_32, LW>;
179+
def PseudoAtomicLB : PseudoAtomicLoad, Sched<[WriteLDB, ReadMemBase]>;
180+
def PseudoAtomicLH : PseudoAtomicLoad, Sched<[WriteLDH, ReadMemBase]>;
181+
def PseudoAtomicLW : PseudoAtomicLoad, Sched<[WriteLDW, ReadMemBase]>;
182+
183+
def PseudoAtomicSB : PseudoAtomicStore, Sched<[WriteSTB, ReadStoreData, ReadMemBase]>;
184+
def PseudoAtomicSH : PseudoAtomicStore, Sched<[WriteSTH, ReadStoreData, ReadMemBase]>;
185+
def PseudoAtomicSW : PseudoAtomicStore, Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
119186

120-
def : StPat<atomic_store_8, SB, GPR, XLenVT>;
121-
def : StPat<atomic_store_16, SH, GPR, XLenVT>;
122-
def : StPat<atomic_store_32, SW, GPR, XLenVT>;
187+
defm : AtomicLdPat<atomic_load_8, PseudoAtomicLB>;
188+
defm : AtomicLdPat<atomic_load_16, PseudoAtomicLH>;
189+
defm : AtomicLdPat<atomic_load_32, PseudoAtomicLW>;
190+
191+
defm : AtomicStPat<atomic_store_8, PseudoAtomicSB>;
192+
defm : AtomicStPat<atomic_store_16, PseudoAtomicSH>;
193+
defm : AtomicStPat<atomic_store_32, PseudoAtomicSW>;
123194
}
124195

125196
let Predicates = [HasAtomicLdSt, IsRV64] in {
126-
def : LdPat<atomic_load_64, LD, i64>;
127-
def : StPat<atomic_store_64, SD, GPR, i64>;
197+
def PseudoAtomicLD : PseudoAtomicLoad, Sched<[WriteLDD, ReadMemBase]>;
198+
def PseudoAtomicSD : PseudoAtomicStore, Sched<[WriteSTD, ReadStoreData, ReadMemBase]>;
199+
200+
defm : AtomicLdPat<atomic_load_64, PseudoAtomicLD>;
201+
defm : AtomicStPat<atomic_store_64, PseudoAtomicSD>;
128202
}
129203

130204
/// AMOs

0 commit comments

Comments
 (0)