Skip to content

Commit b423e1f

Browse files
committed
[SDAG][RISCV] Avoid neg instructions when lowering atomic_load_sub with a constant rhs
This patch avoids creating (sub x0, rhs) when lowering atomic_load_sub with a constant rhs. Comparison with GCC: https://godbolt.org/z/c5zPdP7j4 Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D158673
1 parent f3796ac commit b423e1f

File tree

9 files changed

+245
-76
lines changed

9 files changed

+245
-76
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3133,6 +3133,23 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
31333133
Results.push_back(Res.getValue(1));
31343134
break;
31353135
}
3136+
case ISD::ATOMIC_LOAD_SUB: {
3137+
SDLoc DL(Node);
3138+
EVT VT = Node->getValueType(0);
3139+
SDValue RHS = Node->getOperand(2);
3140+
AtomicSDNode *AN = cast<AtomicSDNode>(Node);
3141+
if (RHS->getOpcode() == ISD::SIGN_EXTEND_INREG &&
3142+
cast<VTSDNode>(RHS->getOperand(1))->getVT() == AN->getMemoryVT())
3143+
RHS = RHS->getOperand(0);
3144+
SDValue NewRHS =
3145+
DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), RHS);
3146+
SDValue Res = DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, AN->getMemoryVT(),
3147+
Node->getOperand(0), Node->getOperand(1),
3148+
NewRHS, AN->getMemOperand());
3149+
Results.push_back(Res);
3150+
Results.push_back(Res.getValue(1));
3151+
break;
3152+
}
31363153
case ISD::DYNAMIC_STACKALLOC:
31373154
ExpandDYNAMIC_STACKALLOC(Node, Results);
31383155
break;

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 7 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -795,8 +795,13 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
795795
setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
796796

797797
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
798-
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
799-
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
798+
if (!Subtarget->hasLSE() && !Subtarget->outlineAtomics()) {
799+
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, LibCall);
800+
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, LibCall);
801+
} else {
802+
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
803+
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Expand);
804+
}
800805
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
801806
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
802807

@@ -6113,8 +6118,6 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
61136118
case ISD::VECREDUCE_FMAXIMUM:
61146119
case ISD::VECREDUCE_FMINIMUM:
61156120
return LowerVECREDUCE(Op, DAG);
6116-
case ISD::ATOMIC_LOAD_SUB:
6117-
return LowerATOMIC_LOAD_SUB(Op, DAG);
61186121
case ISD::ATOMIC_LOAD_AND:
61196122
return LowerATOMIC_LOAD_AND(Op, DAG);
61206123
case ISD::DYNAMIC_STACKALLOC:
@@ -13748,23 +13751,6 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
1374813751
}
1374913752
}
1375013753

13751-
SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op,
13752-
SelectionDAG &DAG) const {
13753-
auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
13754-
if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
13755-
return SDValue();
13756-
13757-
// LSE has an atomic load-add instruction, but not a load-sub.
13758-
SDLoc dl(Op);
13759-
MVT VT = Op.getSimpleValueType();
13760-
SDValue RHS = Op.getOperand(2);
13761-
AtomicSDNode *AN = cast<AtomicSDNode>(Op.getNode());
13762-
RHS = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), RHS);
13763-
return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl, AN->getMemoryVT(),
13764-
Op.getOperand(0), Op.getOperand(1), RHS,
13765-
AN->getMemOperand());
13766-
}
13767-
1376813754
SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op,
1376913755
SelectionDAG &DAG) const {
1377013756
auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1097,7 +1097,6 @@ class AArch64TargetLowering : public TargetLowering {
10971097
SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
10981098
SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
10991099
SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
1100-
SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
11011100
SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
11021101
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
11031102
SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain,

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1349,19 +1349,19 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
13491349
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other,
13501350
Subtarget->hasAnyDataBarrier() ? Custom : Expand);
13511351

1352-
// Set them all for expansion, which will force libcalls.
1353-
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
1354-
setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
1355-
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
1356-
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
1357-
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
1358-
setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
1359-
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
1360-
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
1361-
setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
1362-
setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
1363-
setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
1364-
setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
1352+
// Set them all for libcall, which will force libcalls.
1353+
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall);
1354+
setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall);
1355+
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall);
1356+
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, LibCall);
1357+
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, LibCall);
1358+
setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall);
1359+
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall);
1360+
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, LibCall);
1361+
setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, LibCall);
1362+
setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, LibCall);
1363+
setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, LibCall);
1364+
setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, LibCall);
13651365
// Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
13661366
// Unordered/Monotonic case.
13671367
if (!InsertFencesForAtomic) {

llvm/lib/Target/Mips/Mips16ISelLowering.cpp

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -127,19 +127,19 @@ Mips16TargetLowering::Mips16TargetLowering(const MipsTargetMachine &TM,
127127
if (!Subtarget.useSoftFloat())
128128
setMips16HardFloatLibCalls();
129129

130-
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
131-
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
132-
setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
133-
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
134-
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
135-
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
136-
setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
137-
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
138-
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
139-
setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
140-
setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
141-
setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
142-
setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
130+
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, LibCall);
131+
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall);
132+
setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall);
133+
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall);
134+
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, LibCall);
135+
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, LibCall);
136+
setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall);
137+
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall);
138+
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, LibCall);
139+
setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, LibCall);
140+
setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, LibCall);
141+
setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, LibCall);
142+
setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, LibCall);
143143

144144
setOperationAction(ISD::ROTR, MVT::i32, Expand);
145145
setOperationAction(ISD::ROTR, MVT::i64, Expand);

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1229,14 +1229,17 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
12291229
}
12301230
}
12311231

1232+
if (Subtarget.hasStdExtA())
1233+
setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Expand);
1234+
12321235
if (Subtarget.hasForcedAtomics()) {
1233-
// Set atomic rmw/cas operations to expand to force __sync libcalls.
1236+
// Force __sync libcalls to be emitted for atomic rmw/cas operations.
12341237
setOperationAction(
12351238
{ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD,
12361239
ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR,
12371240
ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN,
12381241
ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX},
1239-
XLenVT, Expand);
1242+
XLenVT, LibCall);
12401243
}
12411244

12421245
if (Subtarget.hasVendorXTHeadMemIdx()) {

llvm/lib/Target/RISCV/RISCVInstrInfoA.td

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -168,17 +168,6 @@ defm : AMOPat<"atomic_load_min_32", "AMOMIN_W">;
168168
defm : AMOPat<"atomic_load_umax_32", "AMOMAXU_W">;
169169
defm : AMOPat<"atomic_load_umin_32", "AMOMINU_W">;
170170

171-
def : Pat<(XLenVT (atomic_load_sub_32_monotonic GPR:$addr, GPR:$incr)),
172-
(AMOADD_W GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
173-
def : Pat<(XLenVT (atomic_load_sub_32_acquire GPR:$addr, GPR:$incr)),
174-
(AMOADD_W_AQ GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
175-
def : Pat<(XLenVT (atomic_load_sub_32_release GPR:$addr, GPR:$incr)),
176-
(AMOADD_W_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
177-
def : Pat<(XLenVT (atomic_load_sub_32_acq_rel GPR:$addr, GPR:$incr)),
178-
(AMOADD_W_AQ_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
179-
def : Pat<(XLenVT (atomic_load_sub_32_seq_cst GPR:$addr, GPR:$incr)),
180-
(AMOADD_W_AQ_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
181-
182171
/// Pseudo AMOs
183172

184173
class PseudoAMO : Pseudo<(outs GPR:$res, GPR:$scratch),
@@ -338,19 +327,6 @@ defm : AMOPat<"atomic_load_min_64", "AMOMIN_D", i64>;
338327
defm : AMOPat<"atomic_load_umax_64", "AMOMAXU_D", i64>;
339328
defm : AMOPat<"atomic_load_umin_64", "AMOMINU_D", i64>;
340329

341-
/// 64-bit AMOs
342-
343-
def : Pat<(i64 (atomic_load_sub_64_monotonic GPR:$addr, GPR:$incr)),
344-
(AMOADD_D GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
345-
def : Pat<(i64 (atomic_load_sub_64_acquire GPR:$addr, GPR:$incr)),
346-
(AMOADD_D_AQ GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
347-
def : Pat<(i64 (atomic_load_sub_64_release GPR:$addr, GPR:$incr)),
348-
(AMOADD_D_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
349-
def : Pat<(i64 (atomic_load_sub_64_acq_rel GPR:$addr, GPR:$incr)),
350-
(AMOADD_D_AQ_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
351-
def : Pat<(i64 (atomic_load_sub_64_seq_cst GPR:$addr, GPR:$incr)),
352-
(AMOADD_D_AQ_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
353-
354330
/// 64-bit pseudo AMOs
355331

356332
let Size = 20 in

llvm/test/CodeGen/Mips/atomicops.ll

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,15 @@ entry:
1212
; 16: lw ${{[0-9]+}}, %call16(__sync_fetch_and_add_4)(${{[0-9]+}})
1313
}
1414

15+
define i32 @atomic_load_sub(ptr %mem, i32 %val, i32 %c) nounwind {
16+
; 16-LABEL: atomic_load_sub:
17+
; 16: lw ${{[0-9]+}}, %call16(__sync_synchronize)(${{[0-9]+}})
18+
; 16: lw ${{[0-9]+}}, %call16(__sync_fetch_and_sub_4)(${{[0-9]+}})
19+
entry:
20+
%0 = atomicrmw sub ptr %mem, i32 %val seq_cst
21+
ret i32 %0
22+
}
23+
1524
define i32 @main() nounwind {
1625
entry:
1726
%x = alloca i32, align 4
@@ -37,5 +46,3 @@ entry:
3746
}
3847

3948
declare i32 @printf(ptr nocapture, ...) nounwind
40-
41-

0 commit comments

Comments
 (0)