Skip to content

Commit b31b4f5

Browse files
committed
Use copyMetadataForAtomic
1 parent 33fac59 commit b31b4f5

10 files changed

+425
-423
lines changed

llvm/lib/CodeGen/AtomicExpandPass.cpp

Lines changed: 35 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,39 @@ static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
194194
return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
195195
}
196196

197+
/// Copy metadata that's safe to preserve when widening atomics.
198+
static void copyMetadataForAtomic(Instruction &Dest,
199+
const Instruction &Source) {
200+
SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
201+
Source.getAllMetadata(MD);
202+
LLVMContext &Ctx = Dest.getContext();
203+
MDBuilder MDB(Ctx);
204+
205+
for (auto [ID, N] : MD) {
206+
switch (ID) {
207+
case LLVMContext::MD_dbg:
208+
case LLVMContext::MD_tbaa:
209+
case LLVMContext::MD_tbaa_struct:
210+
case LLVMContext::MD_alias_scope:
211+
case LLVMContext::MD_noalias:
212+
case LLVMContext::MD_noalias_addrspace:
213+
case LLVMContext::MD_access_group:
214+
case LLVMContext::MD_mmra:
215+
Dest.setMetadata(ID, N);
216+
break;
217+
default:
218+
if (ID == Ctx.getMDKindID("amdgpu.no.remote.memory"))
219+
Dest.setMetadata(ID, N);
220+
else if (ID == Ctx.getMDKindID("amdgpu.no.fine.grained.memory"))
221+
Dest.setMetadata(ID, N);
222+
223+
// Losing amdgpu.ignore.denormal.mode, but it doesn't matter for current
224+
// uses.
225+
break;
226+
}
227+
}
228+
}
229+
197230
// Determine if a particular atomic operation has a supported size,
198231
// and is of appropriate alignment, to be passed through for target
199232
// lowering. (Versus turning into a __atomic libcall)
@@ -617,7 +650,7 @@ static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr,
617650
Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
618651
AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder), SSID);
619652
if (MetadataSrc)
620-
Pair->copyMetadata(*MetadataSrc);
653+
copyMetadataForAtomic(*Pair, *MetadataSrc);
621654

622655
Success = Builder.CreateExtractValue(Pair, 1, "success");
623656
NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
@@ -970,37 +1003,6 @@ void AtomicExpandImpl::expandPartwordAtomicRMW(
9701003
AI->eraseFromParent();
9711004
}
9721005

973-
/// Copy metadata that's safe to preserve when widening atomics.
974-
static void copyMetadataForAtomic(Instruction &Dest,
975-
const Instruction &Source) {
976-
SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
977-
Source.getAllMetadata(MD);
978-
LLVMContext &Ctx = Dest.getContext();
979-
MDBuilder MDB(Ctx);
980-
981-
for (auto [ID, N] : MD) {
982-
switch (ID) {
983-
case LLVMContext::MD_dbg:
984-
case LLVMContext::MD_tbaa:
985-
case LLVMContext::MD_tbaa_struct:
986-
case LLVMContext::MD_alias_scope:
987-
case LLVMContext::MD_noalias:
988-
case LLVMContext::MD_noalias_addrspace:
989-
case LLVMContext::MD_access_group:
990-
case LLVMContext::MD_mmra:
991-
Dest.setMetadata(ID, N);
992-
break;
993-
default:
994-
if (ID == Ctx.getMDKindID("amdgpu.no.remote.memory"))
995-
Dest.setMetadata(ID, N);
996-
else if (ID == Ctx.getMDKindID("amdgpu.no.fine.grained.memory"))
997-
Dest.setMetadata(ID, N);
998-
999-
break;
1000-
}
1001-
}
1002-
}
1003-
10041006
// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
10051007
AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
10061008
ReplacementIRBuilder Builder(AI, *DL);
@@ -1850,7 +1852,7 @@ void AtomicExpandImpl::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
18501852
Addr, Loaded, NewVal, Alignment, MemOpOrder,
18511853
AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder), SSID);
18521854
if (MetadataSrc)
1853-
Pair->copyMetadata(*MetadataSrc);
1855+
copyMetadataForAtomic(*Pair, *MetadataSrc);
18541856

18551857
Success = Builder.CreateExtractValue(Pair, 1, "success");
18561858
NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");

llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f32-agent.ll

Lines changed: 104 additions & 104 deletions
Large diffs are not rendered by default.

llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f32-system.ll

Lines changed: 82 additions & 82 deletions
Large diffs are not rendered by default.

llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ define float @syncscope_system(ptr %addr, float %val) {
1313
; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]]
1414
; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1515
; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
16-
; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[ADDR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]], !amdgpu.ignore.denormal.mode [[META0]]
16+
; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[ADDR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]]
1717
; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1818
; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1919
; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
@@ -83,7 +83,7 @@ define float @syncscope_workgroup_rtn(ptr %addr, float %val) {
8383
; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]]
8484
; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
8585
; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
86-
; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[ADDR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("workgroup") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
86+
; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[ADDR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("workgroup") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
8787
; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
8888
; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
8989
; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
@@ -303,7 +303,7 @@ define float @flat_atomicrmw_fadd_f32__align32(ptr %addr, float %val) {
303303
; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]]
304304
; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
305305
; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
306-
; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[ADDR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 32, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
306+
; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[ADDR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 32, !amdgpu.no.fine.grained.memory [[META0]]
307307
; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
308308
; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
309309
; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float

0 commit comments

Comments
 (0)