Skip to content

[AMDGPU] Add an option to disable unsafe uses of atomic xor #69229

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions clang/include/clang/Basic/TargetInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,9 @@ class TargetInfo : public TransferrableTargetInfo,
LLVM_PREFERRED_TYPE(bool)
unsigned ARMCDECoprocMask : 8;

LLVM_PREFERRED_TYPE(bool)
unsigned AllowAMDGPUFineGrainedMem : 1;

unsigned MaxOpenCLWorkGroupSize;

std::optional<unsigned> MaxBitIntWidth;
Expand Down Expand Up @@ -1009,6 +1012,9 @@ class TargetInfo : public TransferrableTargetInfo,
/// allowed.
bool allowAMDGPUUnsafeFPAtomics() const { return AllowAMDGPUUnsafeFPAtomics; }

/// Returns whether or not fine-grained memory access is allowed on AMDGPU.
bool allowAMDGPUFineGrainedMem() const { return AllowAMDGPUFineGrainedMem; }

/// For ARM targets returns a mask defining which coprocessors are configured
/// as Custom Datapath.
uint32_t getARMCDECoprocMask() const { return ARMCDECoprocMask; }
Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/Basic/TargetOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ class TargetOptions {
/// \brief If enabled, allow AMDGPU unsafe floating point atomics.
bool AllowAMDGPUUnsafeFPAtomics = false;

/// \brief If enabled, allow fine-grained memory access on AMDGPU.
bool AllowAMDGPUFineGrainedMem = false;

/// \brief Code object version for AMDGPU.
llvm::CodeObjectVersionKind CodeObjectVersion =
llvm::CodeObjectVersionKind::COV_None;
Expand Down
10 changes: 10 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -4749,6 +4749,16 @@ defm unsafe_fp_atomics : BoolOption<"m", "unsafe-fp-atomics",

def faltivec : Flag<["-"], "faltivec">, Group<f_Group>;
def fno_altivec : Flag<["-"], "fno-altivec">, Group<f_Group>;

defm amdgpu_fine_grained_mem : BoolOption<"m", "amdgpu-fine-grained-mem",
TargetOpts<"AllowAMDGPUFineGrainedMem">, DefaultFalse,
PosFlag<SetTrue, [], [ClangOption, CC1Option],
"Indicates that fine-grained memory allocations may be accessed in the "
"kernel. This may result in certain atomic operations being replaced "
"in order to guarantee correct operation when fine-grained memory "
"allocations are used. (AMDGPU only)">,
NegFlag<SetFalse, [], [ClangOption, CC1Option]>>, Group<m_Group>;

let Flags = [TargetSpecific] in {
def maltivec : Flag<["-"], "maltivec">, Group<m_ppc_Features_Group>,
HelpText<"Enable AltiVec vector initializer syntax">;
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Basic/TargetInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : Triple(T) {
HasAArch64SVETypes = false;
HasRISCVVTypes = false;
AllowAMDGPUUnsafeFPAtomics = false;
AllowAMDGPUFineGrainedMem = false;
ARMCDECoprocMask = 0;

// Default to no types using fpret.
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Basic/Targets/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,7 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
HasFloat16 = true;
WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64;
AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics;
AllowAMDGPUFineGrainedMem = Opts.AllowAMDGPUFineGrainedMem;

// Set pointer width and alignment for the generic address space.
PointerWidth = PointerAlign = getPointerWidthV(LangAS::Default);
Expand Down
23 changes: 23 additions & 0 deletions clang/lib/CodeGen/Targets/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,26 @@ void AMDGPUTargetCodeGenInfo::emitTargetGlobals(
}
}

// Add metadata to specific atomic instructions, to mark them as potentially
// accessing fine-grained memory locations.
static void addFineGrainedAtomicMD(llvm::Function *F) {
llvm::LLVMContext &Ctx = F->getContext();
llvm::MDBuilder MDHelper(Ctx);
auto *Int32Ty = llvm::IntegerType::getInt32Ty(Ctx);
for (llvm::BasicBlock &BB : *F) {
for (llvm::Instruction &I : BB) {
llvm::AtomicRMWInst *ARI = llvm::dyn_cast<llvm::AtomicRMWInst>(&I);
if (!ARI || ARI->getOperation() != llvm::AtomicRMWInst::Xor)
continue;
auto *Key = MDHelper.createString("fine_grained");
auto *One = MDHelper.createConstant(llvm::ConstantInt::get(Int32Ty, 1));
llvm::MDNode *MD = llvm::MDNode::get(Ctx, {Key, One});
auto *Tuple = llvm::MDNode::get(Ctx, {MD});
ARI->setMetadata("amdgpu.atomic", Tuple);
}
}
}

void AMDGPUTargetCodeGenInfo::setTargetAttributes(
const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
if (requiresAMDGPUProtectedVisibility(D, GV)) {
Expand All @@ -413,6 +433,9 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics())
F->addFnAttr("amdgpu-unsafe-fp-atomics", "true");

if (M.getContext().getTargetInfo().allowAMDGPUFineGrainedMem())
addFineGrainedAtomicMD(F);

if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts)
F->addFnAttr("amdgpu-ieee", "false");
}
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7346,6 +7346,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,

Args.addOptInFlag(CmdArgs, options::OPT_munsafe_fp_atomics,
options::OPT_mno_unsafe_fp_atomics);
Args.addOptInFlag(CmdArgs, options::OPT_mamdgpu_fine_grained_mem,
options::OPT_mno_amdgpu_fine_grained_mem);
Args.addOptOutFlag(CmdArgs, options::OPT_mamdgpu_ieee,
options::OPT_mno_amdgpu_ieee);
}
Expand Down
62 changes: 62 additions & 0 deletions clang/test/CodeGenOpenCL/amdgpu-unsafe-int-atomics.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// REQUIRES: amdgpu-registered-target
//
// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -O3 -S -o - %s \
// RUN: -emit-llvm \
// RUN: | FileCheck -check-prefixes=COMMON,UNSAFE-INT-DEFAULT %s
// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -O3 -S -o - %s \
// RUN: -emit-llvm -mamdgpu-fine-grained-mem \
// RUN: | FileCheck -check-prefixes=COMMON,UNSAFE-INT-ON %s
// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -O3 -S -o - %s \
// RUN: -emit-llvm -mno-amdgpu-fine-grained-mem \
// RUN: | FileCheck -check-prefixes=COMMON,UNSAFE-INT-OFF %s

// Check AMDGCN ISA generation.

// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -O3 -S -o - %s \
// RUN: | FileCheck -check-prefixes=COMMON-ISA,ISA-DEFAULT %s
// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -O3 -S -o - %s \
// RUN: -mamdgpu-fine-grained-mem \
// RUN: | FileCheck -check-prefixes=COMMON-ISA,ISA-ON %s
// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -O3 -S -o - %s \
// RUN: -mno-amdgpu-fine-grained-mem \
// RUN: | FileCheck -check-prefixes=COMMON-ISA,ISA-OFF %s

#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable

typedef enum memory_order {
memory_order_relaxed = __ATOMIC_RELAXED,
memory_order_acquire = __ATOMIC_ACQUIRE,
memory_order_release = __ATOMIC_RELEASE,
memory_order_acq_rel = __ATOMIC_ACQ_REL,
memory_order_seq_cst = __ATOMIC_SEQ_CST
} memory_order;

typedef enum memory_scope {
memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups)
memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
#endif
} memory_scope;

// COMMON-ISA: kern:
// ISA-ON: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] glc
// ISA-OFF: flat_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc
// ISA-DEFAULT: flat_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc
kernel void kern(global atomic_int *x, int y, global int *z) {
*z = __opencl_atomic_fetch_xor(x, y, memory_order_seq_cst, memory_scope_work_group);
}

// COMMON: define{{.*}} amdgpu_kernel void @kern
// COMMON: atomicrmw xor ptr addrspace(1) %x, i32 %y syncscope("workgroup") seq_cst, align 4

// UNSAFE-INT-ON-SAME: !amdgpu.atomic ![[REF:[0-9]+]]
// UNSAFE-INT-ON: ![[REF]] = !{![[REF2:[0-9]+]]}
// UNSAFE-INT-ON: ![[REF2]] = !{!"fine_grained", i32 1}

// UNSAFE-INT-OFF-NOT: !amdgpu.atomic

// UNSAFE-INT-DEFAULT-NOT: !amdgpu.atomic
26 changes: 26 additions & 0 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15107,6 +15107,26 @@ bool unsafeFPAtomicsDisabled(Function *F) {
"true";
}

// Inspect the instruction's metadata to determine whether or not it can access
// fine-grained memory allocations. Some atomic instructions may fail on certain
// systems when accessing fine-grained memory.
static bool canAccessFineGrainedMem(AtomicRMWInst &RMW) {
if (MDNode *MD = RMW.getMetadata("amdgpu.atomic")) {
for (const MDOperand &Op : MD->operands()) {
MDNode *OpMD = dyn_cast<MDNode>(&*Op);
if (!OpMD || OpMD->getNumOperands() < 2)
continue;
const MDString *NameOp = dyn_cast<MDString>(OpMD->getOperand(0));
const MDOperand &ValOp = OpMD->getOperand(1);
if (NameOp->getString().equals("fine_grained") &&
mdconst::extract<ConstantInt>(ValOp)->getZExtValue() != 0) {
return true;
}
}
}
return false;
}

TargetLowering::AtomicExpansionKind
SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
unsigned AS = RMW->getPointerAddressSpace();
Expand Down Expand Up @@ -15229,6 +15249,12 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
}
break;
}
case AtomicRMWInst::Xor: {
if (AMDGPU::isFlatGlobalAddrSpace(AS) && canAccessFineGrainedMem(*RMW)) {
return AtomicExpansionKind::CmpXChg;
}
break;
}
default:
break;
}
Expand Down
24 changes: 20 additions & 4 deletions llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX908 %s
; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefix=GFX90A %s
; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s
; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX1100 %s
; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX908 %s
; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX90A %s
; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX940 %s
; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1100 %s

define float @syncscope_system(ptr %addr, float %val) #0 {
; GFX908-LABEL: syncscope_system:
Expand Down Expand Up @@ -391,4 +391,20 @@ define float @no_unsafe(ptr %addr, float %val) {
ret float %res
}

define i32 @default_xor(ptr %addr, i32 %val) {
; GCN-LABEL: default_xor:
; GCN: flat_atomic_xor
%res = atomicrmw xor ptr %addr, i32 %val seq_cst
ret i32 %res
}

define i32 @no_unsafe_xor(ptr %addr, i32 %val) {
; GCN-LABEL: no_unsafe_xor:
; GCN: flat_atomic_cmpswap
%res = atomicrmw xor ptr %addr, i32 %val seq_cst, !amdgpu.atomic !0
ret i32 %res
}

attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" }
!0 = !{!1}
!1 = !{!"fine_grained", i32 1}