Skip to content

Commit f0f8e09

Browse files
committed
AMDGPU: Remove ds atomic fadd intrinsics
These have been replaced with atomicrmw fadd
1 parent 625fc4b commit f0f8e09

18 files changed

+255
-654
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19084,7 +19084,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1908419084
ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)),
1908519085
EmitScalarExpr(E->getArg(3)), AO, SSID);
1908619086
} else {
19087-
// The ds_fadd_* builtins do not have syncscope/order arguments.
19087+
// The ds_atomic_fadd_* builtins do not have syncscope/order arguments.
1908819088
SSID = llvm::SyncScope::System;
1908919089
AO = AtomicOrdering::SequentiallyConsistent;
1909019090

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -571,7 +571,6 @@ def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic;
571571
def int_amdgcn_ds_append : AMDGPUDSAppendConsumedIntrinsic;
572572
def int_amdgcn_ds_consume : AMDGPUDSAppendConsumedIntrinsic;
573573

574-
def int_amdgcn_ds_fadd : AMDGPULDSIntrin;
575574
def int_amdgcn_ds_fmin : AMDGPULDSIntrin;
576575
def int_amdgcn_ds_fmax : AMDGPULDSIntrin;
577576

@@ -2930,10 +2929,6 @@ multiclass AMDGPUMFp8SmfmacIntrinsic<LLVMType DestTy> {
29302929
// bf16 atomics use v2i16 argument since there is no bf16 data type in the llvm.
29312930
def int_amdgcn_global_atomic_fadd_v2bf16 : AMDGPUAtomicRtn<llvm_v2i16_ty>;
29322931
def int_amdgcn_flat_atomic_fadd_v2bf16 : AMDGPUAtomicRtn<llvm_v2i16_ty>;
2933-
def int_amdgcn_ds_fadd_v2bf16 : DefaultAttrsIntrinsic<
2934-
[llvm_v2i16_ty],
2935-
[LLVMQualPointerType<3>, llvm_v2i16_ty],
2936-
[IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
29372932

29382933
defset list<Intrinsic> AMDGPUMFMAIntrinsics940 = {
29392934
def int_amdgcn_mfma_i32_16x16x32_i8 : AMDGPUMfmaIntrinsic<llvm_v4i32_ty, llvm_i64_ty>;

llvm/lib/IR/AutoUpgrade.cpp

Lines changed: 66 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1033,6 +1033,12 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
10331033
break; // No other 'amdgcn.atomic.*'
10341034
}
10351035

1036+
if (Name.starts_with("ds.fadd")) {
1037+
// Replaced with atomicrmw fadd, so there's no new declaration.
1038+
NewFn = nullptr;
1039+
return true;
1040+
}
1041+
10361042
if (Name.starts_with("ldexp.")) {
10371043
// Target specific intrinsic became redundant
10381044
NewFn = Intrinsic::getDeclaration(
@@ -2331,40 +2337,74 @@ static Value *upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F,
23312337
llvm_unreachable("Unknown function for ARM CallBase upgrade.");
23322338
}
23332339

2340+
// These are expected to have the arguments:
2341+
// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
2342+
//
2343+
// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
2344+
//
23342345
static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
23352346
Function *F, IRBuilder<> &Builder) {
2336-
const bool IsInc = Name.starts_with("atomic.inc.");
2337-
if (IsInc || Name.starts_with("atomic.dec.")) {
2338-
if (CI->getNumOperands() != 6) // Malformed bitcode.
2339-
return nullptr;
2347+
AtomicRMWInst::BinOp RMWOp =
2348+
StringSwitch<AtomicRMWInst::BinOp>(Name)
2349+
.StartsWith("ds.fadd", AtomicRMWInst::FAdd)
2350+
.StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
2351+
.StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap);
2352+
2353+
unsigned NumOperands = CI->getNumOperands();
2354+
if (NumOperands < 3) // Malformed bitcode.
2355+
return nullptr;
23402356

2341-
AtomicRMWInst::BinOp RMWOp =
2342-
IsInc ? AtomicRMWInst::UIncWrap : AtomicRMWInst::UDecWrap;
2357+
Value *Ptr = CI->getArgOperand(0);
2358+
if (!isa<PointerType>(Ptr->getType())) // Malformed.
2359+
return nullptr;
23432360

2344-
Value *Ptr = CI->getArgOperand(0);
2345-
Value *Val = CI->getArgOperand(1);
2346-
ConstantInt *OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
2361+
Value *Val = CI->getArgOperand(1);
2362+
if (Val->getType() != CI->getType()) // Malformed.
2363+
return nullptr;
2364+
2365+
ConstantInt *OrderArg = nullptr;
2366+
bool IsVolatile = false;
2367+
2368+
// These should have 5 arguments (plus the callee). A separate version of the
2369+
// ds_fadd intrinsic was defined for bf16 which was missing arguments.
2370+
if (NumOperands > 3)
2371+
OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
2372+
2373+
// Ignore scope argument at 3
2374+
2375+
if (NumOperands > 5) {
23472376
ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
2377+
IsVolatile = !VolatileArg || !VolatileArg->isZero();
2378+
}
23482379

2349-
AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent;
2350-
if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
2351-
Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
2352-
if (Order == AtomicOrdering::NotAtomic ||
2353-
Order == AtomicOrdering::Unordered)
2354-
Order = AtomicOrdering::SequentiallyConsistent;
2355-
2356-
// The scope argument never really worked correctly. Use agent as the most
2357-
// conservative option which should still always produce the instruction.
2358-
SyncScope::ID SSID = F->getContext().getOrInsertSyncScopeID("agent");
2359-
AtomicRMWInst *RMW =
2360-
Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
2361-
2362-
if (!VolatileArg || !VolatileArg->isZero())
2363-
RMW->setVolatile(true);
2364-
return RMW;
2380+
AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent;
2381+
if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
2382+
Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
2383+
if (Order == AtomicOrdering::NotAtomic || Order == AtomicOrdering::Unordered)
2384+
Order = AtomicOrdering::SequentiallyConsistent;
2385+
2386+
LLVMContext &Ctx = F->getContext();
2387+
2388+
// Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
2389+
Type *RetTy = CI->getType();
2390+
if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
2391+
if (VT->getElementType()->isIntegerTy(16)) {
2392+
VectorType *AsBF16 =
2393+
VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
2394+
Val = Builder.CreateBitCast(Val, AsBF16);
2395+
}
23652396
}
23662397

2367-
llvm_unreachable("Unknown function for AMDGPU intrinsic upgrade.");
2398+
// The scope argument never really worked correctly. Use agent as the most
2399+
// conservative option which should still always produce the instruction.
2400+
SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
2401+
AtomicRMWInst *RMW =
2402+
Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
2403+
2404+
if (IsVolatile)
2405+
RMW->setVolatile(true);
2406+
2407+
return Builder.CreateBitCast(RMW, RetTy);
23682408
}
23692409

23702410
/// Helper to unwrap intrinsic call MetadataAsValue operands.

llvm/lib/Target/AMDGPU/AMDGPUInstructions.td

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -630,7 +630,6 @@ defm int_amdgcn_global_atomic_fmin : noret_op;
630630
defm int_amdgcn_global_atomic_fmax : noret_op;
631631
defm int_amdgcn_global_atomic_csub : noret_op;
632632
defm int_amdgcn_flat_atomic_fadd : local_addr_space_atomic_op;
633-
defm int_amdgcn_ds_fadd_v2bf16 : noret_op;
634633
defm int_amdgcn_global_atomic_ordered_add_b64 : noret_op;
635634
defm int_amdgcn_flat_atomic_fmin_num : noret_op;
636635
defm int_amdgcn_flat_atomic_fmax_num : noret_op;

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5403,8 +5403,6 @@ bool AMDGPULegalizerInfo::legalizeRsqClampIntrinsic(MachineInstr &MI,
54035403

54045404
static unsigned getDSFPAtomicOpcode(Intrinsic::ID IID) {
54055405
switch (IID) {
5406-
case Intrinsic::amdgcn_ds_fadd:
5407-
return AMDGPU::G_ATOMICRMW_FADD;
54085406
case Intrinsic::amdgcn_ds_fmin:
54095407
return AMDGPU::G_ATOMICRMW_FMIN;
54105408
case Intrinsic::amdgcn_ds_fmax:
@@ -7333,7 +7331,6 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
73337331
return legalizeBufferAtomic(MI, B, IntrID);
73347332
case Intrinsic::amdgcn_rsq_clamp:
73357333
return legalizeRsqClampIntrinsic(MI, MRI, B);
7336-
case Intrinsic::amdgcn_ds_fadd:
73377334
case Intrinsic::amdgcn_ds_fmin:
73387335
case Intrinsic::amdgcn_ds_fmax:
73397336
return legalizeDSAtomicFPIntrinsic(Helper, MI, IntrID);

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4905,8 +4905,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
49054905
case Intrinsic::amdgcn_global_load_tr_b128:
49064906
return getDefaultMappingAllVGPR(MI);
49074907
case Intrinsic::amdgcn_ds_ordered_add:
4908-
case Intrinsic::amdgcn_ds_ordered_swap:
4909-
case Intrinsic::amdgcn_ds_fadd_v2bf16: {
4908+
case Intrinsic::amdgcn_ds_ordered_swap: {
49104909
unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
49114910
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
49124911
unsigned M0Bank = getRegBankID(MI.getOperand(2).getReg(), MRI,

llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -252,10 +252,8 @@ def : SourceOfDivergence<int_amdgcn_flat_atomic_fmin_num>;
252252
def : SourceOfDivergence<int_amdgcn_flat_atomic_fmax_num>;
253253
def : SourceOfDivergence<int_amdgcn_global_atomic_fadd_v2bf16>;
254254
def : SourceOfDivergence<int_amdgcn_flat_atomic_fadd_v2bf16>;
255-
def : SourceOfDivergence<int_amdgcn_ds_fadd>;
256255
def : SourceOfDivergence<int_amdgcn_ds_fmin>;
257256
def : SourceOfDivergence<int_amdgcn_ds_fmax>;
258-
def : SourceOfDivergence<int_amdgcn_ds_fadd_v2bf16>;
259257
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_swap>;
260258
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_add>;
261259
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_sub>;

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -502,7 +502,6 @@ bool GCNTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
502502
switch (Inst->getIntrinsicID()) {
503503
case Intrinsic::amdgcn_ds_ordered_add:
504504
case Intrinsic::amdgcn_ds_ordered_swap:
505-
case Intrinsic::amdgcn_ds_fadd:
506505
case Intrinsic::amdgcn_ds_fmin:
507506
case Intrinsic::amdgcn_ds_fmax: {
508507
auto *Ordering = dyn_cast<ConstantInt>(Inst->getArgOperand(2));
@@ -1019,7 +1018,6 @@ bool GCNTTIImpl::isAlwaysUniform(const Value *V) const {
10191018
bool GCNTTIImpl::collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
10201019
Intrinsic::ID IID) const {
10211020
switch (IID) {
1022-
case Intrinsic::amdgcn_ds_fadd:
10231021
case Intrinsic::amdgcn_ds_fmin:
10241022
case Intrinsic::amdgcn_ds_fmax:
10251023
case Intrinsic::amdgcn_is_shared:
@@ -1041,7 +1039,6 @@ Value *GCNTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
10411039
Value *NewV) const {
10421040
auto IntrID = II->getIntrinsicID();
10431041
switch (IntrID) {
1044-
case Intrinsic::amdgcn_ds_fadd:
10451042
case Intrinsic::amdgcn_ds_fmin:
10461043
case Intrinsic::amdgcn_ds_fmax: {
10471044
const ConstantInt *IsVolatile = cast<ConstantInt>(II->getArgOperand(4));

llvm/lib/Target/AMDGPU/DSInstructions.td

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1142,16 +1142,6 @@ def : DSAtomicRetPatIntrinsic<DS_ADD_F64, f64, int_amdgcn_flat_atomic_fadd_noret
11421142

11431143
let SubtargetPredicate = HasAtomicDsPkAdd16Insts in {
11441144
defm : DSAtomicRetNoRetPat_mc<DS_PK_ADD_RTN_F16, DS_PK_ADD_F16, v2f16, "atomic_load_fadd">;
1145-
1146-
def : GCNPat <
1147-
(v2i16 (int_amdgcn_ds_fadd_v2bf16 i32:$ptr, v2i16:$src)),
1148-
(DS_PK_ADD_RTN_BF16 VGPR_32:$ptr, VGPR_32:$src, 0, 0)
1149-
>;
1150-
let AddedComplexity = 1 in
1151-
def : GCNPat <
1152-
(v2i16 (int_amdgcn_ds_fadd_v2bf16_noret i32:$ptr, v2i16:$src)),
1153-
(DS_PK_ADD_BF16 VGPR_32:$ptr, VGPR_32:$src, 0, 0)
1154-
>;
11551145
} // End SubtargetPredicate = HasAtomicDsPkAdd16Insts
11561146

11571147
let OtherPredicates = [HasGDS] in

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1280,7 +1280,6 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
12801280
switch (IntrID) {
12811281
case Intrinsic::amdgcn_ds_ordered_add:
12821282
case Intrinsic::amdgcn_ds_ordered_swap:
1283-
case Intrinsic::amdgcn_ds_fadd:
12841283
case Intrinsic::amdgcn_ds_fmin:
12851284
case Intrinsic::amdgcn_ds_fmax: {
12861285
Info.opc = ISD::INTRINSIC_W_CHAIN;
@@ -1451,7 +1450,6 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II,
14511450
case Intrinsic::amdgcn_atomic_cond_sub_u32:
14521451
case Intrinsic::amdgcn_ds_append:
14531452
case Intrinsic::amdgcn_ds_consume:
1454-
case Intrinsic::amdgcn_ds_fadd:
14551453
case Intrinsic::amdgcn_ds_fmax:
14561454
case Intrinsic::amdgcn_ds_fmin:
14571455
case Intrinsic::amdgcn_ds_ordered_add:
@@ -8700,19 +8698,6 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
87008698
M->getVTList(), Ops, M->getMemoryVT(),
87018699
M->getMemOperand());
87028700
}
8703-
case Intrinsic::amdgcn_ds_fadd: {
8704-
MemSDNode *M = cast<MemSDNode>(Op);
8705-
unsigned Opc;
8706-
switch (IntrID) {
8707-
case Intrinsic::amdgcn_ds_fadd:
8708-
Opc = ISD::ATOMIC_LOAD_FADD;
8709-
break;
8710-
}
8711-
8712-
return DAG.getAtomic(Opc, SDLoc(Op), M->getMemoryVT(),
8713-
M->getOperand(0), M->getOperand(2), M->getOperand(3),
8714-
M->getMemOperand());
8715-
}
87168701
case Intrinsic::amdgcn_ds_fmin:
87178702
case Intrinsic::amdgcn_ds_fmax: {
87188703
MemSDNode *M = cast<MemSDNode>(Op);

0 commit comments

Comments
 (0)