Skip to content

Commit e51102b

Browse files
committed
[RFC] IR: Define noalias.addrspace metadata
This is intended to solve a problem with lowering atomics in OpenMP and C++ common to AMDGPU and NVPTX. In OpenCL and CUDA, it is undefined behavior for an atomic instruction to modify an object in thread private memory. In OpenMP, it is defined. Correspondingly, the hardware does not handle this correctly. For AMDGPU, 32-bit atomics work and 64-bit atomics are silently dropped. We therefore need to codegen this by inserting a runtime address space check, performing the private case without atomics, and fallback to issuing the real atomic otherwise. This metadata allows us to avoid this extra check and branch. Handle this by introducing metadata intended to be applied to atomicrmw, indicating they cannot access the forbidden address space.
1 parent 67cb040 commit e51102b

File tree

6 files changed

+237
-6
lines changed

6 files changed

+237
-6
lines changed

llvm/docs/LangRef.rst

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8020,6 +8020,42 @@ it will contain a list of ids, including the ids of the callsites in the
80208020
full inline sequence, in order from the leaf-most call's id to the outermost
80218021
inlined call.
80228022

8023+
8024+
'``noalias.addrspace``' Metadata
8025+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
8026+
8027+
The ``noalias.addrspace`` metadata is used to identify memory
8028+
operations which cannot access a range of address spaces. It is
8029+
attached to memory instructions, including :ref:`atomicrmw
8030+
<i_atomicrmw>`, :ref:`cmpxchg <i_cmpxchg>`, and :ref:`call <i_call>`
8031+
instructions.
8032+
8033+
This follows the same form as :ref:`range metadata <_range-metadata>`,
8034+
except the field entries must be of type `i32`. The interpretation is
8035+
the same numeric address spaces as applied to IR values.
8036+
8037+
Example:
8038+
8039+
.. code-block:: llvm
8040+
; %ptr cannot point to an object allocated in addrspace(5)
8041+
%rmw.valid = atomicrmw and ptr %ptr, i64 %value seq_cst, !noalias.addrspace !0
8042+
8043+
; Undefined behavior. The underlying object is allocated in one of the listed
8044+
; address spaces.
8045+
%alloca = alloca i64, addrspace(5)
8046+
%alloca.cast = addrspacecast ptr addrspace(5) %alloca to ptr
8047+
%rmw.ub = atomicrmw and ptr %alloca.cast, i64 %value seq_cst, !noalias.addrspace !0
8048+
8049+
!0 = !{i32 5, i32 6}
8050+
8051+
8052+
This is intended for use on targets with a notion of generic address
8053+
spaces, which at runtime resolve to different physical memory
8054+
spaces. The interpretation of the address space values is target
8055+
specific. The behavior is undefined if the runtime memory address does
8056+
resolve to an object defined in one of the indicated address spaces.
8057+
8058+
80238059
Module Flags Metadata
80248060
=====================
80258061

llvm/docs/ReleaseNotes.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ Changes to the LLVM IR
5353
* The ``x86_mmx`` IR type has been removed. It will be translated to
5454
the standard vector type ``<1 x i64>`` in bitcode upgrade.
5555

56+
* Introduced `noalias.addrspace` metadata.
57+
5658
Changes to LLVM infrastructure
5759
------------------------------
5860

llvm/include/llvm/IR/FixedMetadataKinds.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,3 +52,4 @@ LLVM_FIXED_MD_KIND(MD_pcsections, "pcsections", 37)
5252
LLVM_FIXED_MD_KIND(MD_DIAssignID, "DIAssignID", 38)
5353
LLVM_FIXED_MD_KIND(MD_coro_outside_frame, "coro.outside.frame", 39)
5454
LLVM_FIXED_MD_KIND(MD_mmra, "mmra", 40)
55+
LLVM_FIXED_MD_KIND(MD_noalias_addrspace, "noalias.addrspace", 41)

llvm/lib/IR/Verifier.cpp

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -515,8 +515,9 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
515515
void visitFunction(const Function &F);
516516
void visitBasicBlock(BasicBlock &BB);
517517
void verifyRangeMetadata(const Value &V, const MDNode *Range, Type *Ty,
518-
bool IsAbsoluteSymbol);
518+
bool IsAbsoluteSymbol, bool IsAddrSpaceRange);
519519
void visitRangeMetadata(Instruction &I, MDNode *Range, Type *Ty);
520+
void visitNoaliasAddrspaceMetadata(Instruction &I, MDNode *Range, Type *Ty);
520521
void visitDereferenceableMetadata(Instruction &I, MDNode *MD);
521522
void visitProfMetadata(Instruction &I, MDNode *MD);
522523
void visitCallStackMetadata(MDNode *MD);
@@ -760,7 +761,7 @@ void Verifier::visitGlobalValue(const GlobalValue &GV) {
760761
if (const MDNode *AbsoluteSymbol =
761762
GO->getMetadata(LLVMContext::MD_absolute_symbol)) {
762763
verifyRangeMetadata(*GO, AbsoluteSymbol, DL.getIntPtrType(GO->getType()),
763-
true);
764+
true, false);
764765
}
765766
}
766767

@@ -4128,7 +4129,8 @@ static bool isContiguous(const ConstantRange &A, const ConstantRange &B) {
41284129
/// Verify !range and !absolute_symbol metadata. These have the same
41294130
/// restrictions, except !absolute_symbol allows the full set.
41304131
void Verifier::verifyRangeMetadata(const Value &I, const MDNode *Range,
4131-
Type *Ty, bool IsAbsoluteSymbol) {
4132+
Type *Ty, bool IsAbsoluteSymbol,
4133+
bool IsAddrSpaceRange) {
41324134
unsigned NumOperands = Range->getNumOperands();
41334135
Check(NumOperands % 2 == 0, "Unfinished range!", Range);
41344136
unsigned NumRanges = NumOperands / 2;
@@ -4145,8 +4147,14 @@ void Verifier::verifyRangeMetadata(const Value &I, const MDNode *Range,
41454147

41464148
Check(High->getType() == Low->getType(), "Range pair types must match!",
41474149
&I);
4148-
Check(High->getType() == Ty->getScalarType(),
4149-
"Range types must match instruction type!", &I);
4150+
4151+
if (IsAddrSpaceRange) {
4152+
Check(High->getType()->isIntegerTy(32),
4153+
"noalias.addrspace type must be i32!", &I);
4154+
} else {
4155+
Check(High->getType() == Ty->getScalarType(),
4156+
"Range types must match instruction type!", &I);
4157+
}
41504158

41514159
APInt HighV = High->getValue();
41524160
APInt LowV = Low->getValue();
@@ -4185,7 +4193,14 @@ void Verifier::verifyRangeMetadata(const Value &I, const MDNode *Range,
41854193
void Verifier::visitRangeMetadata(Instruction &I, MDNode *Range, Type *Ty) {
41864194
assert(Range && Range == I.getMetadata(LLVMContext::MD_range) &&
41874195
"precondition violation");
4188-
verifyRangeMetadata(I, Range, Ty, false);
4196+
verifyRangeMetadata(I, Range, Ty, false, false);
4197+
}
4198+
4199+
void Verifier::visitNoaliasAddrspaceMetadata(Instruction &I, MDNode *Range,
4200+
Type *Ty) {
4201+
assert(Range && Range == I.getMetadata(LLVMContext::MD_noalias_addrspace) &&
4202+
"precondition violation");
4203+
verifyRangeMetadata(I, Range, Ty, false, true);
41894204
}
41904205

41914206
void Verifier::checkAtomicMemAccessSize(Type *Ty, const Instruction *I) {
@@ -5177,6 +5192,13 @@ void Verifier::visitInstruction(Instruction &I) {
51775192
visitRangeMetadata(I, Range, I.getType());
51785193
}
51795194

5195+
if (MDNode *Range = I.getMetadata(LLVMContext::MD_noalias_addrspace)) {
5196+
Check(isa<LoadInst>(I) || isa<StoreInst>(I) || isa<AtomicRMWInst>(I) ||
5197+
isa<AtomicCmpXchgInst>(I) || isa<CallInst>(I),
5198+
"noalias.addrspace are only for memory operations!", &I);
5199+
visitNoaliasAddrspaceMetadata(I, Range, I.getType());
5200+
}
5201+
51805202
if (I.hasMetadata(LLVMContext::MD_invariant_group)) {
51815203
Check(isa<LoadInst>(I) || isa<StoreInst>(I),
51825204
"invariant.group metadata is only for loads and stores", &I);
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
; RUN: llvm-as < %s | llvm-dis | FileCheck %s
2+
3+
define i64 @atomicrmw_noalias_addrspace__0_1(ptr %ptr, i64 %val) {
4+
; CHECK-LABEL: define i64 @atomicrmw_noalias_addrspace__0_1(
5+
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) {
6+
; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr [[PTR]], i64 [[VAL]] seq_cst, align 8, !noalias.addrspace [[META0:![0-9]+]]
7+
; CHECK-NEXT: ret i64 [[RET]]
8+
;
9+
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !0
10+
ret i64 %ret
11+
}
12+
13+
define i64 @atomicrmw_noalias_addrspace__0_2(ptr %ptr, i64 %val) {
14+
; CHECK-LABEL: define i64 @atomicrmw_noalias_addrspace__0_2(
15+
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) {
16+
; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr [[PTR]], i64 [[VAL]] seq_cst, align 8, !noalias.addrspace [[META1:![0-9]+]]
17+
; CHECK-NEXT: ret i64 [[RET]]
18+
;
19+
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !1
20+
ret i64 %ret
21+
}
22+
23+
define i64 @atomicrmw_noalias_addrspace__1_3(ptr %ptr, i64 %val) {
24+
; CHECK-LABEL: define i64 @atomicrmw_noalias_addrspace__1_3(
25+
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) {
26+
; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr [[PTR]], i64 [[VAL]] seq_cst, align 8, !noalias.addrspace [[META2:![0-9]+]]
27+
; CHECK-NEXT: ret i64 [[RET]]
28+
;
29+
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !2
30+
ret i64 %ret
31+
}
32+
33+
define i64 @atomicrmw_noalias_addrspace__multiple_ranges(ptr %ptr, i64 %val) {
34+
; CHECK-LABEL: define i64 @atomicrmw_noalias_addrspace__multiple_ranges(
35+
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) {
36+
; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr [[PTR]], i64 [[VAL]] seq_cst, align 8, !noalias.addrspace [[META3:![0-9]+]]
37+
; CHECK-NEXT: ret i64 [[RET]]
38+
;
39+
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !3
40+
ret i64 %ret
41+
}
42+
43+
define i64 @load_noalias_addrspace__5_6(ptr %ptr) {
44+
; CHECK-LABEL: define i64 @load_noalias_addrspace__5_6(
45+
; CHECK-SAME: ptr [[PTR:%.*]]) {
46+
; CHECK-NEXT: [[RET:%.*]] = load i64, ptr [[PTR]], align 4, !noalias.addrspace [[META4:![0-9]+]]
47+
; CHECK-NEXT: ret i64 [[RET]]
48+
;
49+
%ret = load i64, ptr %ptr, align 4, !noalias.addrspace !4
50+
ret i64 %ret
51+
}
52+
53+
define void @store_noalias_addrspace__5_6(ptr %ptr, i64 %val) {
54+
; CHECK-LABEL: define void @store_noalias_addrspace__5_6(
55+
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) {
56+
; CHECK-NEXT: store i64 [[VAL]], ptr [[PTR]], align 4, !noalias.addrspace [[META4]]
57+
; CHECK-NEXT: ret void
58+
;
59+
store i64 %val, ptr %ptr, align 4, !noalias.addrspace !4
60+
ret void
61+
}
62+
63+
define { i64, i1 } @cmpxchg_noalias_addrspace__5_6(ptr %ptr, i64 %val0, i64 %val1) {
64+
; CHECK-LABEL: define { i64, i1 } @cmpxchg_noalias_addrspace__5_6(
65+
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL0:%.*]], i64 [[VAL1:%.*]]) {
66+
; CHECK-NEXT: [[RET:%.*]] = cmpxchg ptr [[PTR]], i64 [[VAL0]], i64 [[VAL1]] monotonic monotonic, align 8, !noalias.addrspace [[META4]]
67+
; CHECK-NEXT: ret { i64, i1 } [[RET]]
68+
;
69+
%ret = cmpxchg ptr %ptr, i64 %val0, i64 %val1 monotonic monotonic, align 8, !noalias.addrspace !4
70+
ret { i64, i1 } %ret
71+
}
72+
73+
declare void @foo()
74+
75+
define void @call_noalias_addrspace__5_6(ptr %ptr) {
76+
; CHECK-LABEL: define void @call_noalias_addrspace__5_6(
77+
; CHECK-SAME: ptr [[PTR:%.*]]) {
78+
; CHECK-NEXT: call void @foo(), !noalias.addrspace [[META4]]
79+
; CHECK-NEXT: ret void
80+
;
81+
call void @foo(), !noalias.addrspace !4
82+
ret void
83+
}
84+
85+
define void @call_memcpy_intrinsic_addrspace__5_6(ptr %dst, ptr %src, i64 %size) {
86+
; CHECK-LABEL: define void @call_memcpy_intrinsic_addrspace__5_6(
87+
; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[SIZE:%.*]]) {
88+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i1 false), !noalias.addrspace [[META4]]
89+
; CHECK-NEXT: ret void
90+
;
91+
call void @llvm.memcpy.p0.p0.i64(ptr %dst, ptr %src, i64 %size, i1 false), !noalias.addrspace !4
92+
ret void
93+
}
94+
95+
declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #0
96+
97+
attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
98+
99+
!0 = !{i32 0, i32 1}
100+
!1 = !{i32 0, i32 2}
101+
!2 = !{i32 1, i32 3}
102+
!3 = !{i32 4, i32 6, i32 10, i32 55}
103+
!4 = !{i32 5, i32 6}
104+
;.
105+
; CHECK: [[META0]] = !{i32 0, i32 1}
106+
; CHECK: [[META1]] = !{i32 0, i32 2}
107+
; CHECK: [[META2]] = !{i32 1, i32 3}
108+
; CHECK: [[META3]] = !{i32 4, i32 6, i32 10, i32 55}
109+
; CHECK: [[META4]] = !{i32 5, i32 6}
110+
;.
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
2+
3+
; CHECK: It should have at least one range!
4+
; CHECK-NEXT: !0 = !{}
5+
define i64 @noalias_addrspace__empty(ptr %ptr, i64 %val) {
6+
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !0
7+
ret i64 %ret
8+
}
9+
10+
; CHECK: Unfinished range!
11+
; CHECK-NEXT: !1 = !{i32 0}
12+
define i64 @noalias_addrspace__single_field(ptr %ptr, i64 %val) {
13+
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !1
14+
ret i64 %ret
15+
}
16+
17+
; CHECK: Range must not be empty!
18+
; CHECK-NEXT: !2 = !{i32 0, i32 0}
19+
define i64 @noalias_addrspace__0_0(ptr %ptr, i64 %val) {
20+
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !2
21+
ret i64 %ret
22+
}
23+
24+
; CHECK: noalias.addrspace type must be i32!
25+
; CHECK-NEXT: %ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !3
26+
define i64 @noalias_addrspace__i64(ptr %ptr, i64 %val) {
27+
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !3
28+
ret i64 %ret
29+
}
30+
31+
; CHECK: The lower limit must be an integer!
32+
define i64 @noalias_addrspace__fp(ptr %ptr, i64 %val) {
33+
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !4
34+
ret i64 %ret
35+
}
36+
37+
; CHECK: The lower limit must be an integer!
38+
define i64 @noalias_addrspace__ptr(ptr %ptr, i64 %val) {
39+
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !5
40+
ret i64 %ret
41+
}
42+
43+
; CHECK: The lower limit must be an integer!
44+
define i64 @noalias_addrspace__nonconstant(ptr %ptr, i64 %val) {
45+
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !6
46+
ret i64 %ret
47+
}
48+
49+
@gv0 = global i32 0
50+
@gv1 = global i32 1
51+
52+
!0 = !{}
53+
!1 = !{i32 0}
54+
!2 = !{i32 0, i32 0}
55+
!3 = !{i64 1, i64 5}
56+
!4 = !{float 0.0, float 2.0}
57+
!5 = !{ptr null, ptr addrspace(1) null}
58+
!6 = !{i32 ptrtoint (ptr @gv0 to i32), i32 ptrtoint (ptr @gv1 to i32) }
59+
60+

0 commit comments

Comments
 (0)