@@ -28,144 +28,145 @@ enum OrderingTy {
28
28
seq_cst = __ATOMIC_SEQ_CST,
29
29
};
30
30
31
- enum ScopeTy {
31
+ enum MemScopeTy {
32
32
system = __MEMORY_SCOPE_SYSTEM,
33
- device_ = __MEMORY_SCOPE_DEVICE,
33
+ device = __MEMORY_SCOPE_DEVICE,
34
34
workgroup = __MEMORY_SCOPE_WRKGRP,
35
35
wavefront = __MEMORY_SCOPE_WVFRNT,
36
36
single = __MEMORY_SCOPE_SINGLE,
37
37
};
38
38
39
- enum MemScopeTy {
40
- all, // All threads on all devices
41
- device, // All threads on the device
42
- cgroup // All threads in the contention group, e.g. the team
43
- };
44
-
45
39
// / Atomically increment \p *Addr and wrap at \p V with \p Ordering semantics.
46
40
uint32_t inc (uint32_t *Addr, uint32_t V, OrderingTy Ordering,
47
- MemScopeTy MemScope = MemScopeTy::all );
41
+ MemScopeTy MemScope = MemScopeTy::device );
48
42
49
43
// / Atomically perform <op> on \p V and \p *Addr with \p Ordering semantics. The
50
44
// / result is stored in \p *Addr;
51
45
// / {
52
46
53
47
template <typename Ty, typename V = utils::remove_addrspace_t <Ty>>
54
48
bool cas (Ty *Address, V ExpectedV, V DesiredV, atomic::OrderingTy OrderingSucc,
55
- atomic::OrderingTy OrderingFail) {
49
+ atomic::OrderingTy OrderingFail,
50
+ MemScopeTy MemScope = MemScopeTy::device) {
56
51
return __scoped_atomic_compare_exchange (Address, &ExpectedV, &DesiredV, false ,
57
- OrderingSucc, OrderingFail,
58
- __MEMORY_SCOPE_DEVICE);
52
+ OrderingSucc, OrderingFail, MemScope);
59
53
}
60
54
61
55
template <typename Ty, typename V = utils::remove_addrspace_t <Ty>>
62
- V add (Ty *Address, V Val, atomic::OrderingTy Ordering) {
63
- return __scoped_atomic_fetch_add (Address, Val, Ordering,
64
- __MEMORY_SCOPE_DEVICE );
56
+ V add (Ty *Address, V Val, atomic::OrderingTy Ordering,
57
+ MemScopeTy MemScope = MemScopeTy::device) {
58
+ return __scoped_atomic_fetch_add (Address, Val, Ordering, MemScope );
65
59
}
66
60
67
61
template <typename Ty, typename V = utils::remove_addrspace_t <Ty>>
68
- V load (Ty *Address, atomic::OrderingTy Ordering) {
69
- return __scoped_atomic_load_n (Address, Ordering, __MEMORY_SCOPE_DEVICE);
62
+ V load (Ty *Address, atomic::OrderingTy Ordering,
63
+ MemScopeTy MemScope = MemScopeTy::device) {
64
+ return __scoped_atomic_load_n (Address, Ordering, MemScope);
70
65
}
71
66
72
67
template <typename Ty, typename V = utils::remove_addrspace_t <Ty>>
73
- void store (Ty *Address, V Val, atomic::OrderingTy Ordering) {
74
- __scoped_atomic_store_n (Address, Val, Ordering, __MEMORY_SCOPE_DEVICE);
68
+ void store (Ty *Address, V Val, atomic::OrderingTy Ordering,
69
+ MemScopeTy MemScope = MemScopeTy::device) {
70
+ __scoped_atomic_store_n (Address, Val, Ordering, MemScope);
75
71
}
76
72
77
73
template <typename Ty, typename V = utils::remove_addrspace_t <Ty>>
78
- V mul (Ty *Address, V Val, atomic::OrderingTy Ordering) {
74
+ V mul (Ty *Address, V Val, atomic::OrderingTy Ordering,
75
+ MemScopeTy MemScope = MemScopeTy::device) {
79
76
Ty TypedCurrentVal, TypedResultVal, TypedNewVal;
80
77
bool Success;
81
78
do {
82
79
TypedCurrentVal = atomic::load (Address, Ordering);
83
80
TypedNewVal = TypedCurrentVal * Val;
84
81
Success = atomic::cas (Address, TypedCurrentVal, TypedNewVal, Ordering,
85
- atomic::relaxed);
82
+ atomic::relaxed, MemScope );
86
83
} while (!Success);
87
84
return TypedResultVal;
88
85
}
89
86
90
87
template <typename Ty, typename V = utils::remove_addrspace_t <Ty>>
91
88
utils::enable_if_t <!utils::is_floating_point_v<V>, V>
92
- max (Ty *Address, V Val, atomic::OrderingTy Ordering) {
93
- return __scoped_atomic_fetch_max (Address, Val, Ordering,
94
- __MEMORY_SCOPE_DEVICE );
89
+ max (Ty *Address, V Val, atomic::OrderingTy Ordering,
90
+ MemScopeTy MemScope = MemScopeTy::device) {
91
+ return __scoped_atomic_fetch_max (Address, Val, Ordering, MemScope );
95
92
}
96
93
97
94
template <typename Ty, typename V = utils::remove_addrspace_t <Ty>>
98
95
utils::enable_if_t <utils::is_same_v<V, float >, V>
99
- max (Ty *Address, V Val, atomic::OrderingTy Ordering) {
96
+ max (Ty *Address, V Val, atomic::OrderingTy Ordering,
97
+ MemScopeTy MemScope = MemScopeTy::device) {
100
98
if (Val >= 0 )
101
- return utils::bitCast<float >(
102
- max (( int32_t *)Address, utils::bitCast<int32_t >(Val), Ordering));
103
- return utils::bitCast<float >(
104
- min (( uint32_t *)Address, utils::bitCast<uint32_t >(Val), Ordering));
99
+ return utils::bitCast<float >(max (
100
+ ( int32_t *)Address, utils::bitCast<int32_t >(Val), Ordering, MemScope ));
101
+ return utils::bitCast<float >(min (
102
+ ( uint32_t *)Address, utils::bitCast<uint32_t >(Val), Ordering, MemScope ));
105
103
}
106
104
107
105
template <typename Ty, typename V = utils::remove_addrspace_t <Ty>>
108
106
utils::enable_if_t <utils::is_same_v<V, double >, V>
109
- max (Ty *Address, V Val, atomic::OrderingTy Ordering) {
107
+ max (Ty *Address, V Val, atomic::OrderingTy Ordering,
108
+ MemScopeTy MemScope = MemScopeTy::device) {
110
109
if (Val >= 0 )
111
- return utils::bitCast<double >(
112
- max (( int64_t *)Address, utils::bitCast<int64_t >(Val), Ordering));
113
- return utils::bitCast<double >(
114
- min (( uint64_t *)Address, utils::bitCast<uint64_t >(Val), Ordering));
110
+ return utils::bitCast<double >(max (
111
+ ( int64_t *)Address, utils::bitCast<int64_t >(Val), Ordering, MemScope ));
112
+ return utils::bitCast<double >(min (
113
+ ( uint64_t *)Address, utils::bitCast<uint64_t >(Val), Ordering, MemScope ));
115
114
}
116
115
117
116
template <typename Ty, typename V = utils::remove_addrspace_t <Ty>>
118
117
utils::enable_if_t <!utils::is_floating_point_v<V>, V>
119
- min (Ty *Address, V Val, atomic::OrderingTy Ordering) {
120
- return __scoped_atomic_fetch_min (Address, Val, Ordering,
121
- __MEMORY_SCOPE_DEVICE );
118
+ min (Ty *Address, V Val, atomic::OrderingTy Ordering,
119
+ MemScopeTy MemScope = MemScopeTy::device) {
120
+ return __scoped_atomic_fetch_min (Address, Val, Ordering, MemScope );
122
121
}
123
122
124
123
// TODO: Implement this with __atomic_fetch_max and remove the duplication.
125
124
template <typename Ty, typename V = utils::remove_addrspace_t <Ty>>
126
125
utils::enable_if_t <utils::is_same_v<V, float >, V>
127
- min (Ty *Address, V Val, atomic::OrderingTy Ordering) {
126
+ min (Ty *Address, V Val, atomic::OrderingTy Ordering,
127
+ MemScopeTy MemScope = MemScopeTy::device) {
128
128
if (Val >= 0 )
129
- return utils::bitCast<float >(
130
- min (( int32_t *)Address, utils::bitCast<int32_t >(Val), Ordering));
131
- return utils::bitCast<float >(
132
- max (( uint32_t *)Address, utils::bitCast<uint32_t >(Val), Ordering));
129
+ return utils::bitCast<float >(min (
130
+ ( int32_t *)Address, utils::bitCast<int32_t >(Val), Ordering, MemScope ));
131
+ return utils::bitCast<float >(max (
132
+ ( uint32_t *)Address, utils::bitCast<uint32_t >(Val), Ordering, MemScope ));
133
133
}
134
134
135
135
// TODO: Implement this with __atomic_fetch_max and remove the duplication.
136
136
template <typename Ty, typename V = utils::remove_addrspace_t <Ty>>
137
137
utils::enable_if_t <utils::is_same_v<V, double >, V>
138
- min (Ty *Address, utils::remove_addrspace_t <Ty> Val,
139
- atomic::OrderingTy Ordering ) {
138
+ min (Ty *Address, utils::remove_addrspace_t <Ty> Val, atomic::OrderingTy Ordering,
139
+ MemScopeTy MemScope = MemScopeTy::device ) {
140
140
if (Val >= 0 )
141
- return utils::bitCast<double >(
142
- min (( int64_t *)Address, utils::bitCast<int64_t >(Val), Ordering));
143
- return utils::bitCast<double >(
144
- max (( uint64_t *)Address, utils::bitCast<uint64_t >(Val), Ordering));
141
+ return utils::bitCast<double >(min (
142
+ ( int64_t *)Address, utils::bitCast<int64_t >(Val), Ordering, MemScope ));
143
+ return utils::bitCast<double >(max (
144
+ ( uint64_t *)Address, utils::bitCast<uint64_t >(Val), Ordering, MemScope ));
145
145
}
146
146
147
147
template <typename Ty, typename V = utils::remove_addrspace_t <Ty>>
148
- V bit_or (Ty *Address, V Val, atomic::OrderingTy Ordering) {
149
- return __scoped_atomic_fetch_or (Address, Val, Ordering,
150
- __MEMORY_SCOPE_DEVICE );
148
+ V bit_or (Ty *Address, V Val, atomic::OrderingTy Ordering,
149
+ MemScopeTy MemScope = MemScopeTy::device) {
150
+ return __scoped_atomic_fetch_or (Address, Val, Ordering, MemScope );
151
151
}
152
152
153
153
template <typename Ty, typename V = utils::remove_addrspace_t <Ty>>
154
- V bit_and (Ty *Address, V Val, atomic::OrderingTy Ordering) {
155
- return __scoped_atomic_fetch_and (Address, Val, Ordering,
156
- __MEMORY_SCOPE_DEVICE );
154
+ V bit_and (Ty *Address, V Val, atomic::OrderingTy Ordering,
155
+ MemScopeTy MemScope = MemScopeTy::device) {
156
+ return __scoped_atomic_fetch_and (Address, Val, Ordering, MemScope );
157
157
}
158
158
159
159
template <typename Ty, typename V = utils::remove_addrspace_t <Ty>>
160
- V bit_xor (Ty *Address, V Val, atomic::OrderingTy Ordering) {
161
- return __scoped_atomic_fetch_xor (Address, Val, Ordering,
162
- __MEMORY_SCOPE_DEVICE );
160
+ V bit_xor (Ty *Address, V Val, atomic::OrderingTy Ordering,
161
+ MemScopeTy MemScope = MemScopeTy::device) {
162
+ return __scoped_atomic_fetch_xor (Address, Val, Ordering, MemScope );
163
163
}
164
164
165
- static inline uint32_t atomicExchange (uint32_t *Address, uint32_t Val,
166
- atomic::OrderingTy Ordering) {
165
+ static inline uint32_t
166
+ atomicExchange (uint32_t *Address, uint32_t Val, atomic::OrderingTy Ordering,
167
+ MemScopeTy MemScope = MemScopeTy::device) {
167
168
uint32_t R;
168
- __scoped_atomic_exchange (Address, &Val, &R, Ordering, __MEMORY_SCOPE_DEVICE );
169
+ __scoped_atomic_exchange (Address, &Val, &R, Ordering, MemScope );
169
170
return R;
170
171
}
171
172
0 commit comments