Skip to content

Commit cd33e84

Browse files
committed
[AMDGPU] Add an option to disable unsafe uses of atomic xor
1 parent 9d27139 commit cd33e84

File tree

10 files changed

+154
-4
lines changed

10 files changed

+154
-4
lines changed

clang/include/clang/Basic/TargetInfo.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,9 @@ class TargetInfo : public TransferrableTargetInfo,
269269
LLVM_PREFERRED_TYPE(bool)
270270
unsigned ARMCDECoprocMask : 8;
271271

272+
LLVM_PREFERRED_TYPE(bool)
273+
unsigned AllowAMDGPUFineGrainedMem : 1;
274+
272275
unsigned MaxOpenCLWorkGroupSize;
273276

274277
std::optional<unsigned> MaxBitIntWidth;
@@ -1009,6 +1012,9 @@ class TargetInfo : public TransferrableTargetInfo,
10091012
/// allowed.
10101013
bool allowAMDGPUUnsafeFPAtomics() const { return AllowAMDGPUUnsafeFPAtomics; }
10111014

1015+
/// Returns whether or not fine-grained memory access is allowed on AMDGPU.
1016+
bool allowAMDGPUFineGrainedMem() const { return AllowAMDGPUFineGrainedMem; }
1017+
10121018
/// For ARM targets returns a mask defining which coprocessors are configured
10131019
/// as Custom Datapath.
10141020
uint32_t getARMCDECoprocMask() const { return ARMCDECoprocMask; }

clang/include/clang/Basic/TargetOptions.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,9 @@ class TargetOptions {
7878
/// \brief If enabled, allow AMDGPU unsafe floating point atomics.
7979
bool AllowAMDGPUUnsafeFPAtomics = false;
8080

81+
/// \brief If enabled, allow fine-grained memory access on AMDGPU.
82+
bool AllowAMDGPUFineGrainedMem = false;
83+
8184
/// \brief Code object version for AMDGPU.
8285
llvm::CodeObjectVersionKind CodeObjectVersion =
8386
llvm::CodeObjectVersionKind::COV_None;

clang/include/clang/Driver/Options.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4749,6 +4749,16 @@ defm unsafe_fp_atomics : BoolOption<"m", "unsafe-fp-atomics",
47494749

47504750
def faltivec : Flag<["-"], "faltivec">, Group<f_Group>;
47514751
def fno_altivec : Flag<["-"], "fno-altivec">, Group<f_Group>;
4752+
4753+
defm amdgpu_fine_grained_mem : BoolOption<"m", "amdgpu-fine-grained-mem",
4754+
TargetOpts<"AllowAMDGPUFineGrainedMem">, DefaultFalse,
4755+
PosFlag<SetTrue, [], [ClangOption, CC1Option],
4756+
"Indicates that fine-grained memory allocations may be accessed in the "
4757+
"kernel. This may result in certain atomic operations being replaced "
4758+
"in order to guarantee correct operation when fine-grained memory "
4759+
"allocations are used. (AMDGPU only)">,
4760+
NegFlag<SetFalse, [], [ClangOption, CC1Option]>>, Group<m_Group>;
4761+
47524762
let Flags = [TargetSpecific] in {
47534763
def maltivec : Flag<["-"], "maltivec">, Group<m_ppc_Features_Group>,
47544764
HelpText<"Enable AltiVec vector initializer syntax">;

clang/lib/Basic/TargetInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : Triple(T) {
157157
HasAArch64SVETypes = false;
158158
HasRISCVVTypes = false;
159159
AllowAMDGPUUnsafeFPAtomics = false;
160+
AllowAMDGPUFineGrainedMem = false;
160161
ARMCDECoprocMask = 0;
161162

162163
// Default to no types using fpret.

clang/lib/Basic/Targets/AMDGPU.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,7 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
232232
HasFloat16 = true;
233233
WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64;
234234
AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics;
235+
AllowAMDGPUFineGrainedMem = Opts.AllowAMDGPUFineGrainedMem;
235236

236237
// Set pointer width and alignment for the generic address space.
237238
PointerWidth = PointerAlign = getPointerWidthV(LangAS::Default);

clang/lib/CodeGen/Targets/AMDGPU.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,26 @@ void AMDGPUTargetCodeGenInfo::emitTargetGlobals(
392392
}
393393
}
394394

395+
// Add metadata to specific atomic instructions, to mark them as potentially
396+
// accessing fine-grained memory locations.
397+
static void addFineGrainedAtomicMD(llvm::Function *F) {
398+
llvm::LLVMContext &Ctx = F->getContext();
399+
llvm::MDBuilder MDHelper(Ctx);
400+
auto *Int32Ty = llvm::IntegerType::getInt32Ty(Ctx);
401+
for (llvm::BasicBlock &BB : *F) {
402+
for (llvm::Instruction &I : BB) {
403+
llvm::AtomicRMWInst *ARI = llvm::dyn_cast<llvm::AtomicRMWInst>(&I);
404+
if (!ARI || ARI->getOperation() != llvm::AtomicRMWInst::Xor)
405+
continue;
406+
auto *Key = MDHelper.createString("fine_grained");
407+
auto *One = MDHelper.createConstant(llvm::ConstantInt::get(Int32Ty, 1));
408+
llvm::MDNode *MD = llvm::MDNode::get(Ctx, {Key, One});
409+
auto *Tuple = llvm::MDNode::get(Ctx, {MD});
410+
ARI->setMetadata("amdgpu.atomic", Tuple);
411+
}
412+
}
413+
}
414+
395415
void AMDGPUTargetCodeGenInfo::setTargetAttributes(
396416
const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
397417
if (requiresAMDGPUProtectedVisibility(D, GV)) {
@@ -413,6 +433,9 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
413433
if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics())
414434
F->addFnAttr("amdgpu-unsafe-fp-atomics", "true");
415435

436+
if (M.getContext().getTargetInfo().allowAMDGPUFineGrainedMem())
437+
addFineGrainedAtomicMD(F);
438+
416439
if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts)
417440
F->addFnAttr("amdgpu-ieee", "false");
418441
}

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7346,6 +7346,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
73467346

73477347
Args.addOptInFlag(CmdArgs, options::OPT_munsafe_fp_atomics,
73487348
options::OPT_mno_unsafe_fp_atomics);
7349+
Args.addOptInFlag(CmdArgs, options::OPT_mamdgpu_fine_grained_mem,
7350+
options::OPT_mno_amdgpu_fine_grained_mem);
73497351
Args.addOptOutFlag(CmdArgs, options::OPT_mamdgpu_ieee,
73507352
options::OPT_mno_amdgpu_ieee);
73517353
}
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
// REQUIRES: amdgpu-registered-target
2+
//
3+
// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -O3 -S -o - %s \
4+
// RUN: -emit-llvm \
5+
// RUN: | FileCheck -check-prefixes=COMMON,UNSAFE-INT-DEFAULT %s
6+
// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -O3 -S -o - %s \
7+
// RUN: -emit-llvm -mamdgpu-fine-grained-mem \
8+
// RUN: | FileCheck -check-prefixes=COMMON,UNSAFE-INT-ON %s
9+
// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -O3 -S -o - %s \
10+
// RUN: -emit-llvm -mno-amdgpu-fine-grained-mem \
11+
// RUN: | FileCheck -check-prefixes=COMMON,UNSAFE-INT-OFF %s
12+
13+
// Check AMDGCN ISA generation.
14+
15+
// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -O3 -S -o - %s \
16+
// RUN: | FileCheck -check-prefixes=COMMON-ISA,ISA-DEFAULT %s
17+
// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -O3 -S -o - %s \
18+
// RUN: -mamdgpu-fine-grained-mem \
19+
// RUN: | FileCheck -check-prefixes=COMMON-ISA,ISA-ON %s
20+
// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -O3 -S -o - %s \
21+
// RUN: -mno-amdgpu-fine-grained-mem \
22+
// RUN: | FileCheck -check-prefixes=COMMON-ISA,ISA-OFF %s
23+
24+
#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
25+
#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable
26+
27+
typedef enum memory_order {
28+
memory_order_relaxed = __ATOMIC_RELAXED,
29+
memory_order_acquire = __ATOMIC_ACQUIRE,
30+
memory_order_release = __ATOMIC_RELEASE,
31+
memory_order_acq_rel = __ATOMIC_ACQ_REL,
32+
memory_order_seq_cst = __ATOMIC_SEQ_CST
33+
} memory_order;
34+
35+
typedef enum memory_scope {
36+
memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
37+
memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
38+
memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
39+
memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
40+
#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups)
41+
memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
42+
#endif
43+
} memory_scope;
44+
45+
// COMMON-ISA: kern:
46+
// ISA-ON: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] glc
47+
// ISA-OFF: flat_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc
48+
// ISA-DEFAULT: flat_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc
49+
kernel void kern(global atomic_int *x, int y, global int *z) {
50+
*z = __opencl_atomic_fetch_xor(x, y, memory_order_seq_cst, memory_scope_work_group);
51+
}
52+
53+
// COMMON: define{{.*}} amdgpu_kernel void @kern
54+
// COMMON: atomicrmw xor ptr addrspace(1) %x, i32 %y syncscope("workgroup") seq_cst, align 4
55+
56+
// UNSAFE-INT-ON-SAME: !amdgpu.atomic ![[REF:[0-9]+]]
57+
// UNSAFE-INT-ON: ![[REF]] = !{![[REF2:[0-9]+]]}
58+
// UNSAFE-INT-ON: ![[REF2]] = !{!"fine_grained", i32 1}
59+
60+
// UNSAFE-INT-OFF-NOT: !amdgpu.atomic
61+
62+
// UNSAFE-INT-DEFAULT-NOT: !amdgpu.atomic

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15107,6 +15107,26 @@ bool unsafeFPAtomicsDisabled(Function *F) {
1510715107
"true";
1510815108
}
1510915109

15110+
// Inspect the instruction's metadata to determine whether or not it can access
15111+
// fine-grained memory allocations. Some atomic instructions may fail on certain
15112+
// systems when accessing fine-grained memory.
15113+
static bool canAccessFineGrainedMem(AtomicRMWInst &RMW) {
15114+
if (MDNode *MD = RMW.getMetadata("amdgpu.atomic")) {
15115+
for (const MDOperand &Op : MD->operands()) {
15116+
MDNode *OpMD = dyn_cast<MDNode>(&*Op);
15117+
if (!OpMD || OpMD->getNumOperands() < 2)
15118+
continue;
15119+
const MDString *NameOp = dyn_cast<MDString>(OpMD->getOperand(0));
15120+
const MDOperand &ValOp = OpMD->getOperand(1);
15121+
if (NameOp->getString().equals("fine_grained") &&
15122+
mdconst::extract<ConstantInt>(ValOp)->getZExtValue() != 0) {
15123+
return true;
15124+
}
15125+
}
15126+
}
15127+
return false;
15128+
}
15129+
1511015130
TargetLowering::AtomicExpansionKind
1511115131
SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1511215132
unsigned AS = RMW->getPointerAddressSpace();
@@ -15229,6 +15249,12 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1522915249
}
1523015250
break;
1523115251
}
15252+
case AtomicRMWInst::Xor: {
15253+
if (AMDGPU::isFlatGlobalAddrSpace(AS) && canAccessFineGrainedMem(*RMW)) {
15254+
return AtomicExpansionKind::CmpXChg;
15255+
}
15256+
break;
15257+
}
1523215258
default:
1523315259
break;
1523415260
}

llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX908 %s
3-
; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefix=GFX90A %s
4-
; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s
5-
; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX1100 %s
2+
; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX908 %s
3+
; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX90A %s
4+
; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX940 %s
5+
; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1100 %s
66

77
define float @syncscope_system(ptr %addr, float %val) #0 {
88
; GFX908-LABEL: syncscope_system:
@@ -391,4 +391,20 @@ define float @no_unsafe(ptr %addr, float %val) {
391391
ret float %res
392392
}
393393

394+
define i32 @default_xor(ptr %addr, i32 %val) {
395+
; GCN-LABEL: default_xor:
396+
; GCN: flat_atomic_xor
397+
%res = atomicrmw xor ptr %addr, i32 %val seq_cst
398+
ret i32 %res
399+
}
400+
401+
define i32 @no_unsafe_xor(ptr %addr, i32 %val) {
402+
; GCN-LABEL: no_unsafe_xor:
403+
; GCN: flat_atomic_cmpswap
404+
%res = atomicrmw xor ptr %addr, i32 %val seq_cst, !amdgpu.atomic !0
405+
ret i32 %res
406+
}
407+
394408
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" }
409+
!0 = !{!1}
410+
!1 = !{!"fine_grained", i32 1}

0 commit comments

Comments
 (0)