-
Notifications
You must be signed in to change notification settings - Fork 13.6k
AMDGPU: Remove ds atomic fadd intrinsics #95396
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AMDGPU: Remove ds atomic fadd intrinsics #95396
Conversation
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesThese have been replaced with atomicrmw fadd Patch is 69.77 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/95396.diff 18 Files Affected:
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index d81cf40c912de..34d7e59ca45fd 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -19084,7 +19084,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)),
EmitScalarExpr(E->getArg(3)), AO, SSID);
} else {
- // The ds_fadd_* builtins do not have syncscope/order arguments.
+ // The ds_atomic_fadd_* builtins do not have syncscope/order arguments.
SSID = llvm::SyncScope::System;
AO = AtomicOrdering::SequentiallyConsistent;
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 916ea04f71fdf..e71b61c5eda02 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -571,7 +571,6 @@ def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic;
def int_amdgcn_ds_append : AMDGPUDSAppendConsumedIntrinsic;
def int_amdgcn_ds_consume : AMDGPUDSAppendConsumedIntrinsic;
-def int_amdgcn_ds_fadd : AMDGPULDSIntrin;
def int_amdgcn_ds_fmin : AMDGPULDSIntrin;
def int_amdgcn_ds_fmax : AMDGPULDSIntrin;
@@ -2970,10 +2969,6 @@ multiclass AMDGPUMFp8SmfmacIntrinsic<LLVMType DestTy> {
// bf16 atomics use v2i16 argument since there is no bf16 data type in the llvm.
def int_amdgcn_global_atomic_fadd_v2bf16 : AMDGPUAtomicRtn<llvm_v2i16_ty>;
def int_amdgcn_flat_atomic_fadd_v2bf16 : AMDGPUAtomicRtn<llvm_v2i16_ty>;
-def int_amdgcn_ds_fadd_v2bf16 : DefaultAttrsIntrinsic<
- [llvm_v2i16_ty],
- [LLVMQualPointerType<3>, llvm_v2i16_ty],
- [IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
defset list<Intrinsic> AMDGPUMFMAIntrinsics940 = {
def int_amdgcn_mfma_i32_16x16x32_i8 : AMDGPUMfmaIntrinsic<llvm_v4i32_ty, llvm_i64_ty>;
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 2f4b8351e747a..5b40fcf0d408e 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -1033,6 +1033,12 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
break; // No other 'amdgcn.atomic.*'
}
+ if (Name.starts_with("ds.fadd")) {
+ // Replaced with atomicrmw fadd, so there's no new declaration.
+ NewFn = nullptr;
+ return true;
+ }
+
if (Name.starts_with("ldexp.")) {
// Target specific intrinsic became redundant
NewFn = Intrinsic::getDeclaration(
@@ -2331,40 +2337,74 @@ static Value *upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F,
llvm_unreachable("Unknown function for ARM CallBase upgrade.");
}
+// These are expected to have have the arguments:
+// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
+//
+// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
+//
static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
Function *F, IRBuilder<> &Builder) {
- const bool IsInc = Name.starts_with("atomic.inc.");
- if (IsInc || Name.starts_with("atomic.dec.")) {
- if (CI->getNumOperands() != 6) // Malformed bitcode.
- return nullptr;
+ AtomicRMWInst::BinOp RMWOp =
+ StringSwitch<AtomicRMWInst::BinOp>(Name)
+ .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
+ .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
+ .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap);
+
+ unsigned NumOperands = CI->getNumOperands();
+ if (NumOperands < 3) // Malformed bitcode.
+ return nullptr;
- AtomicRMWInst::BinOp RMWOp =
- IsInc ? AtomicRMWInst::UIncWrap : AtomicRMWInst::UDecWrap;
+ Value *Ptr = CI->getArgOperand(0);
+ if (!isa<PointerType>(Ptr->getType())) // Malformed.
+ return nullptr;
- Value *Ptr = CI->getArgOperand(0);
- Value *Val = CI->getArgOperand(1);
- ConstantInt *OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+ Value *Val = CI->getArgOperand(1);
+ if (Val->getType() != CI->getType()) // Malformed.
+ return nullptr;
+
+ ConstantInt *OrderArg = nullptr;
+ bool IsVolatile = false;
+
+ // These should have 5 arguments (plus the callee). A separate version of the
+ // ds_fadd intrinsic was defined for bf16 which was missing arguments.
+ if (NumOperands > 3)
+ OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+
+ // Ignore scope argument at 3
+
+ if (NumOperands > 5) {
ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
+ IsVolatile = !VolatileArg || !VolatileArg->isZero();
+ }
- AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent;
- if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
- Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
- if (Order == AtomicOrdering::NotAtomic ||
- Order == AtomicOrdering::Unordered)
- Order = AtomicOrdering::SequentiallyConsistent;
-
- // The scope argument never really worked correctly. Use agent as the most
- // conservative option which should still always produce the instruction.
- SyncScope::ID SSID = F->getContext().getOrInsertSyncScopeID("agent");
- AtomicRMWInst *RMW =
- Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
-
- if (!VolatileArg || !VolatileArg->isZero())
- RMW->setVolatile(true);
- return RMW;
+ AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent;
+ if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
+ Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
+ if (Order == AtomicOrdering::NotAtomic || Order == AtomicOrdering::Unordered)
+ Order = AtomicOrdering::SequentiallyConsistent;
+
+ LLVMContext &Ctx = F->getContext();
+
+ // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
+ Type *RetTy = CI->getType();
+ if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
+ if (VT->getElementType()->isIntegerTy(16)) {
+ VectorType *AsBF16 =
+ VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
+ Val = Builder.CreateBitCast(Val, AsBF16);
+ }
}
- llvm_unreachable("Unknown function for AMDGPU intrinsic upgrade.");
+ // The scope argument never really worked correctly. Use agent as the most
+ // conservative option which should still always produce the instruction.
+ SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
+ AtomicRMWInst *RMW =
+ Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
+
+ if (IsVolatile)
+ RMW->setVolatile(true);
+
+ return Builder.CreateBitCast(RMW, RetTy);
}
/// Helper to unwrap intrinsic call MetadataAsValue operands.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 783bc9d7ef593..357b029d6ff0a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -627,7 +627,6 @@ defm int_amdgcn_global_atomic_fmin : noret_op;
defm int_amdgcn_global_atomic_fmax : noret_op;
defm int_amdgcn_global_atomic_csub : noret_op;
defm int_amdgcn_flat_atomic_fadd : local_addr_space_atomic_op;
-defm int_amdgcn_ds_fadd_v2bf16 : noret_op;
defm int_amdgcn_global_atomic_ordered_add_b64 : noret_op;
defm int_amdgcn_flat_atomic_fmin_num : noret_op;
defm int_amdgcn_flat_atomic_fmax_num : noret_op;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 19f19eb7b7f44..7205442eb184a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -5398,8 +5398,6 @@ bool AMDGPULegalizerInfo::legalizeRsqClampIntrinsic(MachineInstr &MI,
static unsigned getDSFPAtomicOpcode(Intrinsic::ID IID) {
switch (IID) {
- case Intrinsic::amdgcn_ds_fadd:
- return AMDGPU::G_ATOMICRMW_FADD;
case Intrinsic::amdgcn_ds_fmin:
return AMDGPU::G_AMDGPU_ATOMIC_FMIN;
case Intrinsic::amdgcn_ds_fmax:
@@ -7335,7 +7333,6 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return legalizeBufferAtomic(MI, B, IntrID);
case Intrinsic::amdgcn_rsq_clamp:
return legalizeRsqClampIntrinsic(MI, MRI, B);
- case Intrinsic::amdgcn_ds_fadd:
case Intrinsic::amdgcn_ds_fmin:
case Intrinsic::amdgcn_ds_fmax:
return legalizeDSAtomicFPIntrinsic(Helper, MI, IntrID);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 7ebd674757fbc..0d2fb811306bb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -4907,8 +4907,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_global_load_tr_b128:
return getDefaultMappingAllVGPR(MI);
case Intrinsic::amdgcn_ds_ordered_add:
- case Intrinsic::amdgcn_ds_ordered_swap:
- case Intrinsic::amdgcn_ds_fadd_v2bf16: {
+ case Intrinsic::amdgcn_ds_ordered_swap: {
unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
unsigned M0Bank = getRegBankID(MI.getOperand(2).getReg(), MRI,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
index e84d39a2895c8..d9c86d2140702 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
@@ -252,10 +252,8 @@ def : SourceOfDivergence<int_amdgcn_flat_atomic_fmin_num>;
def : SourceOfDivergence<int_amdgcn_flat_atomic_fmax_num>;
def : SourceOfDivergence<int_amdgcn_global_atomic_fadd_v2bf16>;
def : SourceOfDivergence<int_amdgcn_flat_atomic_fadd_v2bf16>;
-def : SourceOfDivergence<int_amdgcn_ds_fadd>;
def : SourceOfDivergence<int_amdgcn_ds_fmin>;
def : SourceOfDivergence<int_amdgcn_ds_fmax>;
-def : SourceOfDivergence<int_amdgcn_ds_fadd_v2bf16>;
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_swap>;
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_add>;
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_sub>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 437e01c37c6b6..1192b49fd1f08 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -502,7 +502,6 @@ bool GCNTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
switch (Inst->getIntrinsicID()) {
case Intrinsic::amdgcn_ds_ordered_add:
case Intrinsic::amdgcn_ds_ordered_swap:
- case Intrinsic::amdgcn_ds_fadd:
case Intrinsic::amdgcn_ds_fmin:
case Intrinsic::amdgcn_ds_fmax: {
auto *Ordering = dyn_cast<ConstantInt>(Inst->getArgOperand(2));
@@ -1019,7 +1018,6 @@ bool GCNTTIImpl::isAlwaysUniform(const Value *V) const {
bool GCNTTIImpl::collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
Intrinsic::ID IID) const {
switch (IID) {
- case Intrinsic::amdgcn_ds_fadd:
case Intrinsic::amdgcn_ds_fmin:
case Intrinsic::amdgcn_ds_fmax:
case Intrinsic::amdgcn_is_shared:
@@ -1041,7 +1039,6 @@ Value *GCNTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
Value *NewV) const {
auto IntrID = II->getIntrinsicID();
switch (IntrID) {
- case Intrinsic::amdgcn_ds_fadd:
case Intrinsic::amdgcn_ds_fmin:
case Intrinsic::amdgcn_ds_fmax: {
const ConstantInt *IsVolatile = cast<ConstantInt>(II->getArgOperand(4));
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index e22f8b7d46171..219246b71fe80 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -1142,16 +1142,6 @@ def : DSAtomicRetPatIntrinsic<DS_ADD_F64, f64, int_amdgcn_flat_atomic_fadd_noret
let SubtargetPredicate = HasAtomicDsPkAdd16Insts in {
defm : DSAtomicRetNoRetPat_mc<DS_PK_ADD_RTN_F16, DS_PK_ADD_F16, v2f16, "atomic_load_fadd">;
-
-def : GCNPat <
- (v2i16 (int_amdgcn_ds_fadd_v2bf16 i32:$ptr, v2i16:$src)),
- (DS_PK_ADD_RTN_BF16 VGPR_32:$ptr, VGPR_32:$src, 0, 0)
->;
-let AddedComplexity = 1 in
-def : GCNPat <
- (v2i16 (int_amdgcn_ds_fadd_v2bf16_noret i32:$ptr, v2i16:$src)),
- (DS_PK_ADD_BF16 VGPR_32:$ptr, VGPR_32:$src, 0, 0)
->;
} // End SubtargetPredicate = HasAtomicDsPkAdd16Insts
let OtherPredicates = [HasGDS] in
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index f59c7a73b18af..c821e3a95e9ae 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1271,7 +1271,6 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
switch (IntrID) {
case Intrinsic::amdgcn_ds_ordered_add:
case Intrinsic::amdgcn_ds_ordered_swap:
- case Intrinsic::amdgcn_ds_fadd:
case Intrinsic::amdgcn_ds_fmin:
case Intrinsic::amdgcn_ds_fmax: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
@@ -1442,7 +1441,6 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II,
case Intrinsic::amdgcn_atomic_cond_sub_u32:
case Intrinsic::amdgcn_ds_append:
case Intrinsic::amdgcn_ds_consume:
- case Intrinsic::amdgcn_ds_fadd:
case Intrinsic::amdgcn_ds_fmax:
case Intrinsic::amdgcn_ds_fmin:
case Intrinsic::amdgcn_ds_ordered_add:
@@ -8690,19 +8688,6 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
M->getVTList(), Ops, M->getMemoryVT(),
M->getMemOperand());
}
- case Intrinsic::amdgcn_ds_fadd: {
- MemSDNode *M = cast<MemSDNode>(Op);
- unsigned Opc;
- switch (IntrID) {
- case Intrinsic::amdgcn_ds_fadd:
- Opc = ISD::ATOMIC_LOAD_FADD;
- break;
- }
-
- return DAG.getAtomic(Opc, SDLoc(Op), M->getMemoryVT(),
- M->getOperand(0), M->getOperand(2), M->getOperand(3),
- M->getMemOperand());
- }
case Intrinsic::amdgcn_ds_fmin:
case Intrinsic::amdgcn_ds_fmax: {
MemSDNode *M = cast<MemSDNode>(Op);
diff --git a/llvm/test/Bitcode/amdgcn-atomic.ll b/llvm/test/Bitcode/amdgcn-atomic.ll
index 2e6286a7df8df..311bd8863859b 100644
--- a/llvm/test/Bitcode/amdgcn-atomic.ll
+++ b/llvm/test/Bitcode/amdgcn-atomic.ll
@@ -112,4 +112,140 @@ declare i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) nocapture, i64, i32
declare i64 @llvm.amdgcn.atomic.dec.i64.p3(ptr addrspace(3) nocapture, i64, i32 immarg, i32 immarg, i1 immarg) #0
declare i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr nocapture, i64, i32 immarg, i32 immarg, i1 immarg) #0
+; ptr, rmw_value, ordering, scope, isVolatile)
+declare float @llvm.amdgcn.ds.fadd.f32(ptr addrspace(3) nocapture, float, i32 immarg, i32 immarg, i1 immarg)
+declare double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) nocapture, double, i32 immarg, i32 immarg, i1 immarg)
+declare <2 x half> @llvm.amdgcn.ds.fadd.v2f16(ptr addrspace(3) nocapture, <2 x half>, i32 immarg, i32 immarg, i1 immarg)
+declare <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(ptr addrspace(3) nocapture, <2 x i16>)
+
+define float @upgrade_amdgcn_ds_fadd_f32(ptr addrspace(3) %ptr, float %val) {
+ ; CHECK: atomicrmw fadd ptr addrspace(3) %ptr, float %val syncscope("agent") seq_cst, align 4
+ %result0 = call float @llvm.amdgcn.ds.fadd.f32(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false)
+
+ ; CHECK: = atomicrmw volatile fadd ptr addrspace(3) %ptr, float %val syncscope("agent") seq_cst, align 4
+ %result1 = call float @llvm.amdgcn.ds.fadd.f32(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 true)
+
+ ; CHECK: = atomicrmw fadd ptr addrspace(3) %ptr, float %val syncscope("agent") seq_cst, align 4
+ %result2 = call float @llvm.amdgcn.ds.fadd.f32(ptr addrspace(3) %ptr, float %val, i32 43, i32 3, i1 false)
+
+ ; CHECK: = atomicrmw fadd ptr addrspace(3) %ptr, float %val syncscope("agent") acquire, align 4
+ %result3 = call float @llvm.amdgcn.ds.fadd.f32(ptr addrspace(3) %ptr, float %val, i32 4, i32 2, i1 false)
+
+ ret float %result3
+}
+
+; Handle missing type suffix
+declare float @llvm.amdgcn.ds.fadd(ptr addrspace(3) nocapture, float, i32 immarg, i32 immarg, i1 immarg)
+
+define float @upgrade_amdgcn_ds_fadd_f32_no_suffix(ptr addrspace(3) %ptr, float %val) {
+ ; CHECK: atomicrmw fadd ptr addrspace(3) %ptr, float %val syncscope("agent") seq_cst, align 4
+ %result0 = call float @llvm.amdgcn.ds.fadd(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false)
+ ret float %result0
+}
+
+define void @immarg_violations_ds_fadd_f32(ptr addrspace(3) %ptr, float %fval, i32 %val32, i1 %val1) {
+ ; CHECK: = atomicrmw volatile fadd ptr addrspace(3) %ptr, float %fval syncscope("agent") seq_cst, align 4
+ %result0 = call float @llvm.amdgcn.ds.fadd.f32(ptr addrspace(3) %ptr, float %fval, i32 %val32, i32 %val32, i1 %val1)
+ ret void
+}
+
+declare float @llvm.amdgcn.ds.fadd.f32broken0(i32, float, i32 immarg, i32 immarg, i1 immarg)
+
+; This will just delete the invalid call, which isn't ideal, but these
+; cases were never emitted.
+; CHECK-LABEL: define void @ds_fadd_f32_invalid_not_ptr(
+; CHECK-NEXT: ret void
+define void @ds_fadd_f32_invalid_not_ptr(i32 %ptr, float %fval) {
+ %result0 = call float @llvm.amdgcn.ds.fadd.f32broken0(i32 %ptr, float %fval, i32 0, i32 0, i1 false)
+ ret void
+}
+
+declare float @llvm.amdgcn.ds.fadd.f32broken1(ptr addrspace(3), double, i32 immarg, i32 immarg, i1 immarg)
+
+; CHECK-LABEL: define void @ds_fadd_f32_invalid_misatch(
+; CHECK-NEXT: ret void
+define void @ds_fadd_f32_invalid_misatch(ptr addrspace(3) %ptr, double %fval) {
+ %result0 = call float @llvm.amdgcn.ds.fadd.f32broken1(ptr addrspace(3) %ptr, double %fval, i32 0, i32 0, i1 false)
+ ret void
+}
+
+define double @upgrade_amdgcn_ds_fadd_f64(ptr addrspace(3) %ptr, double %val) {
+ ; CHECK: atomicrmw fadd ptr addrspace(3) %ptr, double %val syncscope("agent") seq_cst, align 8
+ %result0 = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %val, i32 0, i32 0, i1 false)
+
+ ; CHECK: = atomicrmw volatile fadd ptr addrspace(3) %ptr, double %val syncscope("agent") seq_cst, align 8
+ %result1 = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %val, i32 0, i32 0, i1 true)
+
+ ; CHECK: = atomicrmw fadd ptr addrspace(3) %ptr, double %val syncscope("agent") seq_cst, align 8
+ %result2 = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %val, i32 43, i32 3, i1 false)
+
+ ; CHECK: = atomicrmw fadd ptr addrspace(3) %ptr, double %val syncscope("agent") acquire, align 8
+ %result3 = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %val, i32 4, i32 2, i1 false)
+
+ ret double %result3
+}
+
+; CHECK-LABEL: @immarg_violations_ds_fadd_f64(
+define void @immarg_violations_ds_fadd_f64(ptr addrspace(3) %ptr, double %fval, i32 %val32, i1 %val1) {
+ ; CHECK: = atomicrmw volatile fadd ptr addrspace(3) %ptr, double %fval syncscope("agent") seq_cst, align 8
+ %result0 = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %fval, i32 %val32, i32 %val32, i1 %val1)
+ ret void
+}
+
+define <2 x half> @upgrade_amdgcn_ds_fadd_v2f16(ptr addrspace(3) %ptr, <2 x half> %val) {
+ ; CHECK: atomicrmw fadd ptr addrspace(3) %ptr, <2 x half> %val syncscope("agent") seq_cst, align 4
+ %result0 = call <2 x half> @llvm.amdgcn.ds.fadd.v2f16(ptr addrspace(3) %ptr, <2 x half> %val, i32 0, i32 0, i1 false)
+
+ ; CHECK: = atomicrmw volatile fadd ptr addrspace(3) %ptr, <2 x half> %val syncscope("agent") seq_cst, align 4
+ %result1 = call <2 x half> @llvm.amdgcn.ds.fadd.v2f16(ptr addrspace(3) %ptr, <2 x half> %val, i32 0, i32 0, i1 true)
+
+ ; CHECK: = atomicrmw fadd ptr addrspace(3) %ptr, <2 x half> %val syncscope("agent") seq_cst, align 4
+ %result2 = call <2 x half> @llvm.amdgcn.ds.fadd.v2f16(ptr addrspace(3) %ptr, <2 x half> %val, i32 43, i32 3, i1 false)
+
+ ; CHECK: = atomicrmw fadd ptr addrspace(3) %ptr, <2 x half> %val syncscope("agent") acquire, align 4
+ %result3 = call <2 x half> @llvm.amdgcn.ds.fadd.v2f16(ptr addrspace(3) %ptr, <2 x half> %val, i32 4, i32 2, i1 false)
+
+ ret <2 x half> %result3
+}
+
+define void @immarg_violations_ds_fadd_v2f16(ptr addrspace(3) %ptr, <2 x half> %fval, i32 %val32, i1 %val1) {
+ ; CHECK: = atomicrmw volatile fadd ptr addrspace(3) %ptr, <2 x half> %fval syncscope("agent") seq_cst, align 4...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM contingent the plan to produce atomicrmw.
b6fa394
to
0bfa259
Compare
baad642
to
0ef98ac
Compare
0bfa259
to
35c741f
Compare
0ef98ac
to
62be4ae
Compare
These have been replaced with atomicrmw fadd
62be4ae
to
f0f8e09
Compare
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/116/builds/387 Here is the relevant piece of the build log for the reference:
|
These have been replaced with atomicrmw fadd
These have been replaced with atomicrmw fadd