Skip to content

Commit a8e1311

Browse files
authored
[RFC] IR: Define noalias.addrspace metadata (#102461)
This is intended to solve a problem with lowering atomics in OpenMP and C++ common to AMDGPU and NVPTX. In OpenCL and CUDA, it is undefined behavior for an atomic instruction to modify an object in thread private memory. In OpenMP, it is defined. Correspondingly, the hardware does not handle this correctly. For AMDGPU, 32-bit atomics work and 64-bit atomics are silently dropped. We therefore need to codegen this by inserting a runtime address space check, performing the private case without atomics, and fallback to issuing the real atomic otherwise. This metadata allows us to avoid this extra check and branch. Handle this by introducing metadata intended to be applied to atomicrmw, indicating they cannot access the forbidden address space.
1 parent 90a5744 commit a8e1311

File tree

8 files changed

+337
-10
lines changed

8 files changed

+337
-10
lines changed

llvm/docs/LangRef.rst

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8047,6 +8047,43 @@ it will contain a list of ids, including the ids of the callsites in the
80478047
full inline sequence, in order from the leaf-most call's id to the outermost
80488048
inlined call.
80498049

8050+
8051+
'``noalias.addrspace``' Metadata
8052+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
8053+
8054+
The ``noalias.addrspace`` metadata is used to identify memory
8055+
operations which cannot access objects allocated in a range of address
8056+
spaces. It is attached to memory instructions, including
8057+
:ref:`atomicrmw <i_atomicrmw>`, :ref:`cmpxchg <i_cmpxchg>`, and
8058+
:ref:`call <i_call>` instructions.
8059+
8060+
This follows the same form as :ref:`range metadata <range-metadata>`,
8061+
except the field entries must be of type `i32`. The interpretation is
8062+
the same numeric address spaces as applied to IR values.
8063+
8064+
Example:
8065+
8066+
.. code-block:: llvm
8067+
8068+
; %ptr cannot point to an object allocated in addrspace(5)
8069+
%rmw.valid = atomicrmw and ptr %ptr, i64 %value seq_cst, !noalias.addrspace !0
8070+
8071+
; Undefined behavior. The underlying object is allocated in one of the listed
8072+
; address spaces.
8073+
%alloca = alloca i64, addrspace(5)
8074+
%alloca.cast = addrspacecast ptr addrspace(5) %alloca to ptr
8075+
%rmw.ub = atomicrmw and ptr %alloca.cast, i64 %value seq_cst, !noalias.addrspace !0
8076+
8077+
!0 = !{i32 5, i32 6} ; Exclude addrspace(5) only
8078+
8079+
8080+
This is intended for use on targets with a notion of generic address
8081+
spaces, which at runtime resolve to different physical memory
8082+
spaces. The interpretation of the address space values is target
8083+
specific. The behavior is undefined if the runtime memory address does
8084+
resolve to an object defined in one of the indicated address spaces.
8085+
8086+
80508087
Module Flags Metadata
80518088
=====================
80528089

llvm/docs/ReleaseNotes.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ Changes to the LLVM IR
6262

6363
* Added `usub_cond` and `usub_sat` operations to `atomicrmw`.
6464

65+
* Introduced `noalias.addrspace` metadata.
66+
6567
* Remove the following intrinsics which can be replaced with a `bitcast`:
6668

6769
* `llvm.nvvm.bitcast.f2i`

llvm/include/llvm/IR/FixedMetadataKinds.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,3 +52,4 @@ LLVM_FIXED_MD_KIND(MD_pcsections, "pcsections", 37)
5252
LLVM_FIXED_MD_KIND(MD_DIAssignID, "DIAssignID", 38)
5353
LLVM_FIXED_MD_KIND(MD_coro_outside_frame, "coro.outside.frame", 39)
5454
LLVM_FIXED_MD_KIND(MD_mmra, "mmra", 40)
55+
LLVM_FIXED_MD_KIND(MD_noalias_addrspace, "noalias.addrspace", 41)

llvm/lib/IR/Verifier.cpp

Lines changed: 43 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -492,6 +492,14 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
492492
/// Whether a metadata node is allowed to be, or contain, a DILocation.
493493
enum class AreDebugLocsAllowed { No, Yes };
494494

495+
/// Metadata that should be treated as a range, with slightly different
496+
/// requirements.
497+
enum class RangeLikeMetadataKind {
498+
Range, // MD_range
499+
AbsoluteSymbol, // MD_absolute_symbol
500+
NoaliasAddrspace // MD_noalias_addrspace
501+
};
502+
495503
// Verification methods...
496504
void visitGlobalValue(const GlobalValue &GV);
497505
void visitGlobalVariable(const GlobalVariable &GV);
@@ -515,9 +523,10 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
515523
void visitModuleFlagCGProfileEntry(const MDOperand &MDO);
516524
void visitFunction(const Function &F);
517525
void visitBasicBlock(BasicBlock &BB);
518-
void verifyRangeMetadata(const Value &V, const MDNode *Range, Type *Ty,
519-
bool IsAbsoluteSymbol);
526+
void verifyRangeLikeMetadata(const Value &V, const MDNode *Range, Type *Ty,
527+
RangeLikeMetadataKind Kind);
520528
void visitRangeMetadata(Instruction &I, MDNode *Range, Type *Ty);
529+
void visitNoaliasAddrspaceMetadata(Instruction &I, MDNode *Range, Type *Ty);
521530
void visitDereferenceableMetadata(Instruction &I, MDNode *MD);
522531
void visitProfMetadata(Instruction &I, MDNode *MD);
523532
void visitCallStackMetadata(MDNode *MD);
@@ -760,8 +769,9 @@ void Verifier::visitGlobalValue(const GlobalValue &GV) {
760769
// FIXME: Why is getMetadata on GlobalValue protected?
761770
if (const MDNode *AbsoluteSymbol =
762771
GO->getMetadata(LLVMContext::MD_absolute_symbol)) {
763-
verifyRangeMetadata(*GO, AbsoluteSymbol, DL.getIntPtrType(GO->getType()),
764-
true);
772+
verifyRangeLikeMetadata(*GO, AbsoluteSymbol,
773+
DL.getIntPtrType(GO->getType()),
774+
RangeLikeMetadataKind::AbsoluteSymbol);
765775
}
766776
}
767777

@@ -4136,8 +4146,8 @@ static bool isContiguous(const ConstantRange &A, const ConstantRange &B) {
41364146

41374147
/// Verify !range and !absolute_symbol metadata. These have the same
41384148
/// restrictions, except !absolute_symbol allows the full set.
4139-
void Verifier::verifyRangeMetadata(const Value &I, const MDNode *Range,
4140-
Type *Ty, bool IsAbsoluteSymbol) {
4149+
void Verifier::verifyRangeLikeMetadata(const Value &I, const MDNode *Range,
4150+
Type *Ty, RangeLikeMetadataKind Kind) {
41414151
unsigned NumOperands = Range->getNumOperands();
41424152
Check(NumOperands % 2 == 0, "Unfinished range!", Range);
41434153
unsigned NumRanges = NumOperands / 2;
@@ -4154,8 +4164,14 @@ void Verifier::verifyRangeMetadata(const Value &I, const MDNode *Range,
41544164

41554165
Check(High->getType() == Low->getType(), "Range pair types must match!",
41564166
&I);
4157-
Check(High->getType() == Ty->getScalarType(),
4158-
"Range types must match instruction type!", &I);
4167+
4168+
if (Kind == RangeLikeMetadataKind::NoaliasAddrspace) {
4169+
Check(High->getType()->isIntegerTy(32),
4170+
"noalias.addrspace type must be i32!", &I);
4171+
} else {
4172+
Check(High->getType() == Ty->getScalarType(),
4173+
"Range types must match instruction type!", &I);
4174+
}
41594175

41604176
APInt HighV = High->getValue();
41614177
APInt LowV = Low->getValue();
@@ -4166,7 +4182,9 @@ void Verifier::verifyRangeMetadata(const Value &I, const MDNode *Range,
41664182
"The upper and lower limits cannot be the same value", &I);
41674183

41684184
ConstantRange CurRange(LowV, HighV);
4169-
Check(!CurRange.isEmptySet() && (IsAbsoluteSymbol || !CurRange.isFullSet()),
4185+
Check(!CurRange.isEmptySet() &&
4186+
(Kind == RangeLikeMetadataKind::AbsoluteSymbol ||
4187+
!CurRange.isFullSet()),
41704188
"Range must not be empty!", Range);
41714189
if (i != 0) {
41724190
Check(CurRange.intersectWith(LastRange).isEmptySet(),
@@ -4194,7 +4212,15 @@ void Verifier::verifyRangeMetadata(const Value &I, const MDNode *Range,
41944212
void Verifier::visitRangeMetadata(Instruction &I, MDNode *Range, Type *Ty) {
41954213
assert(Range && Range == I.getMetadata(LLVMContext::MD_range) &&
41964214
"precondition violation");
4197-
verifyRangeMetadata(I, Range, Ty, false);
4215+
verifyRangeLikeMetadata(I, Range, Ty, RangeLikeMetadataKind::Range);
4216+
}
4217+
4218+
void Verifier::visitNoaliasAddrspaceMetadata(Instruction &I, MDNode *Range,
4219+
Type *Ty) {
4220+
assert(Range && Range == I.getMetadata(LLVMContext::MD_noalias_addrspace) &&
4221+
"precondition violation");
4222+
verifyRangeLikeMetadata(I, Range, Ty,
4223+
RangeLikeMetadataKind::NoaliasAddrspace);
41984224
}
41994225

42004226
void Verifier::checkAtomicMemAccessSize(Type *Ty, const Instruction *I) {
@@ -5187,6 +5213,13 @@ void Verifier::visitInstruction(Instruction &I) {
51875213
visitRangeMetadata(I, Range, I.getType());
51885214
}
51895215

5216+
if (MDNode *Range = I.getMetadata(LLVMContext::MD_noalias_addrspace)) {
5217+
Check(isa<LoadInst>(I) || isa<StoreInst>(I) || isa<AtomicRMWInst>(I) ||
5218+
isa<AtomicCmpXchgInst>(I) || isa<CallInst>(I),
5219+
"noalias.addrspace are only for memory operations!", &I);
5220+
visitNoaliasAddrspaceMetadata(I, Range, I.getType());
5221+
}
5222+
51905223
if (I.hasMetadata(LLVMContext::MD_invariant_group)) {
51915224
Check(isa<LoadInst>(I) || isa<StoreInst>(I),
51925225
"invariant.group metadata is only for loads and stores", &I);
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
; RUN: llvm-as < %s | llvm-dis | FileCheck %s
2+
3+
define i64 @atomicrmw_noalias_addrspace__0_1(ptr %ptr, i64 %val) {
4+
; CHECK-LABEL: define i64 @atomicrmw_noalias_addrspace__0_1(
5+
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) {
6+
; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr [[PTR]], i64 [[VAL]] seq_cst, align 8, !noalias.addrspace [[META0:![0-9]+]]
7+
; CHECK-NEXT: ret i64 [[RET]]
8+
;
9+
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !0
10+
ret i64 %ret
11+
}
12+
13+
define i64 @atomicrmw_noalias_addrspace__0_2(ptr %ptr, i64 %val) {
14+
; CHECK-LABEL: define i64 @atomicrmw_noalias_addrspace__0_2(
15+
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) {
16+
; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr [[PTR]], i64 [[VAL]] seq_cst, align 8, !noalias.addrspace [[META1:![0-9]+]]
17+
; CHECK-NEXT: ret i64 [[RET]]
18+
;
19+
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !1
20+
ret i64 %ret
21+
}
22+
23+
define i64 @atomicrmw_noalias_addrspace__1_3(ptr %ptr, i64 %val) {
24+
; CHECK-LABEL: define i64 @atomicrmw_noalias_addrspace__1_3(
25+
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) {
26+
; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr [[PTR]], i64 [[VAL]] seq_cst, align 8, !noalias.addrspace [[META2:![0-9]+]]
27+
; CHECK-NEXT: ret i64 [[RET]]
28+
;
29+
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !2
30+
ret i64 %ret
31+
}
32+
33+
define i64 @atomicrmw_noalias_addrspace__multiple_ranges(ptr %ptr, i64 %val) {
34+
; CHECK-LABEL: define i64 @atomicrmw_noalias_addrspace__multiple_ranges(
35+
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) {
36+
; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr [[PTR]], i64 [[VAL]] seq_cst, align 8, !noalias.addrspace [[META3:![0-9]+]]
37+
; CHECK-NEXT: ret i64 [[RET]]
38+
;
39+
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !3
40+
ret i64 %ret
41+
}
42+
43+
define i64 @load_noalias_addrspace__5_6(ptr %ptr) {
44+
; CHECK-LABEL: define i64 @load_noalias_addrspace__5_6(
45+
; CHECK-SAME: ptr [[PTR:%.*]]) {
46+
; CHECK-NEXT: [[RET:%.*]] = load i64, ptr [[PTR]], align 4, !noalias.addrspace [[META4:![0-9]+]]
47+
; CHECK-NEXT: ret i64 [[RET]]
48+
;
49+
%ret = load i64, ptr %ptr, align 4, !noalias.addrspace !4
50+
ret i64 %ret
51+
}
52+
53+
define void @store_noalias_addrspace__5_6(ptr %ptr, i64 %val) {
54+
; CHECK-LABEL: define void @store_noalias_addrspace__5_6(
55+
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) {
56+
; CHECK-NEXT: store i64 [[VAL]], ptr [[PTR]], align 4, !noalias.addrspace [[META4]]
57+
; CHECK-NEXT: ret void
58+
;
59+
store i64 %val, ptr %ptr, align 4, !noalias.addrspace !4
60+
ret void
61+
}
62+
63+
define { i64, i1 } @cmpxchg_noalias_addrspace__5_6(ptr %ptr, i64 %val0, i64 %val1) {
64+
; CHECK-LABEL: define { i64, i1 } @cmpxchg_noalias_addrspace__5_6(
65+
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL0:%.*]], i64 [[VAL1:%.*]]) {
66+
; CHECK-NEXT: [[RET:%.*]] = cmpxchg ptr [[PTR]], i64 [[VAL0]], i64 [[VAL1]] monotonic monotonic, align 8, !noalias.addrspace [[META4]]
67+
; CHECK-NEXT: ret { i64, i1 } [[RET]]
68+
;
69+
%ret = cmpxchg ptr %ptr, i64 %val0, i64 %val1 monotonic monotonic, align 8, !noalias.addrspace !4
70+
ret { i64, i1 } %ret
71+
}
72+
73+
declare void @foo()
74+
75+
define void @call_noalias_addrspace__5_6(ptr %ptr) {
76+
; CHECK-LABEL: define void @call_noalias_addrspace__5_6(
77+
; CHECK-SAME: ptr [[PTR:%.*]]) {
78+
; CHECK-NEXT: call void @foo(), !noalias.addrspace [[META4]]
79+
; CHECK-NEXT: ret void
80+
;
81+
call void @foo(), !noalias.addrspace !4
82+
ret void
83+
}
84+
85+
define void @call_memcpy_intrinsic_addrspace__5_6(ptr %dst, ptr %src, i64 %size) {
86+
; CHECK-LABEL: define void @call_memcpy_intrinsic_addrspace__5_6(
87+
; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[SIZE:%.*]]) {
88+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i1 false), !noalias.addrspace [[META4]]
89+
; CHECK-NEXT: ret void
90+
;
91+
call void @llvm.memcpy.p0.p0.i64(ptr %dst, ptr %src, i64 %size, i1 false), !noalias.addrspace !4
92+
ret void
93+
}
94+
95+
declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #0
96+
97+
attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
98+
99+
!0 = !{i32 0, i32 1}
100+
!1 = !{i32 0, i32 2}
101+
!2 = !{i32 1, i32 3}
102+
!3 = !{i32 4, i32 6, i32 10, i32 55}
103+
!4 = !{i32 5, i32 6}
104+
;.
105+
; CHECK: [[META0]] = !{i32 0, i32 1}
106+
; CHECK: [[META1]] = !{i32 0, i32 2}
107+
; CHECK: [[META2]] = !{i32 1, i32 3}
108+
; CHECK: [[META3]] = !{i32 4, i32 6, i32 10, i32 55}
109+
; CHECK: [[META4]] = !{i32 5, i32 6}
110+
;.

llvm/test/Transforms/InstCombine/loadstore-metadata.ll

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,19 @@ define i32 @test_load_cast_combine_noundef(ptr %ptr) {
173173
ret i32 %c
174174
}
175175

176+
define i32 @test_load_cast_combine_noalias_addrspace(ptr %ptr) {
177+
; Ensure (cast (load (...))) -> (load (cast (...))) preserves TBAA.
178+
; CHECK-LABEL: @test_load_cast_combine_noalias_addrspace(
179+
; CHECK-NEXT: entry:
180+
; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[PTR:%.*]], align 4
181+
; CHECK-NEXT: ret i32 [[L1]]
182+
;
183+
entry:
184+
%l = load float, ptr %ptr, align 4, !noalias.addrspace !11
185+
%c = bitcast float %l to i32
186+
ret i32 %c
187+
}
188+
176189
!0 = !{!1, !1, i64 0}
177190
!1 = !{!"scalar type", !2}
178191
!2 = !{!"root"}
@@ -184,3 +197,4 @@ define i32 @test_load_cast_combine_noundef(ptr %ptr) {
184197
!8 = !{i32 1}
185198
!9 = !{i64 8}
186199
!10 = distinct !{}
200+
!11 = !{i32 5, i32 6}

llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,10 +316,80 @@ out:
316316
ret void
317317
}
318318

319+
define void @hoist_noalias_addrspace_both(i1 %c, ptr %p, i64 %val) {
320+
; CHECK-LABEL: @hoist_noalias_addrspace_both(
321+
; CHECK-NEXT: if:
322+
; CHECK-NEXT: [[T:%.*]] = atomicrmw add ptr [[P:%.*]], i64 [[VAL:%.*]] seq_cst, align 8
323+
; CHECK-NEXT: ret void
324+
;
325+
if:
326+
br i1 %c, label %then, label %else
327+
328+
then:
329+
%t = atomicrmw add ptr %p, i64 %val seq_cst, !noalias.addrspace !4
330+
br label %out
331+
332+
else:
333+
%e = atomicrmw add ptr %p, i64 %val seq_cst, !noalias.addrspace !4
334+
br label %out
335+
336+
out:
337+
ret void
338+
}
339+
340+
define void @hoist_noalias_addrspace_one(i1 %c, ptr %p, i64 %val) {
341+
; CHECK-LABEL: @hoist_noalias_addrspace_one(
342+
; CHECK-NEXT: if:
343+
; CHECK-NEXT: [[T:%.*]] = atomicrmw add ptr [[P:%.*]], i64 [[VAL:%.*]] seq_cst, align 8
344+
; CHECK-NEXT: ret void
345+
;
346+
if:
347+
br i1 %c, label %then, label %else
348+
349+
then:
350+
%t = atomicrmw add ptr %p, i64 %val seq_cst, !noalias.addrspace !4
351+
br label %out
352+
353+
else:
354+
%e = atomicrmw add ptr %p, i64 %val seq_cst
355+
br label %out
356+
357+
out:
358+
ret void
359+
}
360+
361+
define void @hoist_noalias_addrspace_switch(i64 %i, ptr %p, i64 %val) {
362+
; CHECK-LABEL: @hoist_noalias_addrspace_switch(
363+
; CHECK-NEXT: out:
364+
; CHECK-NEXT: [[T:%.*]] = atomicrmw add ptr [[P:%.*]], i64 [[VAL:%.*]] seq_cst, align 8
365+
; CHECK-NEXT: ret void
366+
;
367+
switch i64 %i, label %bb0 [
368+
i64 1, label %bb1
369+
i64 2, label %bb2
370+
]
371+
bb0:
372+
%t = atomicrmw add ptr %p, i64 %val seq_cst, !noalias.addrspace !4
373+
br label %out
374+
bb1:
375+
%e = atomicrmw add ptr %p, i64 %val seq_cst, !noalias.addrspace !5
376+
br label %out
377+
bb2:
378+
%f = atomicrmw add ptr %p, i64 %val seq_cst, !noalias.addrspace !6
379+
br label %out
380+
out:
381+
ret void
382+
}
383+
384+
319385
!0 = !{ i8 0, i8 1 }
320386
!1 = !{ i8 3, i8 5 }
321387
!2 = !{}
322388
!3 = !{ i8 7, i8 9 }
389+
!4 = !{i32 5, i32 6}
390+
!5 = !{i32 5, i32 7}
391+
!6 = !{i32 4, i32 8}
392+
323393
;.
324394
; CHECK: [[RNG0]] = !{i8 0, i8 1, i8 3, i8 5}
325395
; CHECK: [[RNG1]] = !{i8 0, i8 1, i8 3, i8 5, i8 7, i8 9}

0 commit comments

Comments
 (0)