Skip to content

Commit bea0115

Browse files
authored
Merge pull request #5418 from hjelmn/v3.1.x_power9
v3.1: opal/fifo: fix 128-bit atomic fifo on Power9
2 parents e9c1ef5 + f669455 commit bea0115

File tree

2 files changed

+39
-26
lines changed

2 files changed

+39
-26
lines changed

opal/class/opal_fifo.h

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -86,9 +86,12 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo,
8686
opal_list_item_t *item)
8787
{
8888
opal_counted_pointer_t tail;
89+
const opal_list_item_t * const ghost = &fifo->opal_fifo_ghost;
8990

9091
item->opal_list_next = &fifo->opal_fifo_ghost;
9192

93+
opal_atomic_wmb ();
94+
9295
do {
9396
tail.value = fifo->opal_fifo_tail.value;
9497

@@ -99,7 +102,7 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo,
99102

100103
opal_atomic_wmb ();
101104

102-
if (&fifo->opal_fifo_ghost == tail.data.item) {
105+
if (ghost == tail.data.item) {
103106
/* update the head */
104107
opal_counted_pointer_t head = {.value = fifo->opal_fifo_head.value};
105108
opal_update_counted_pointer (&fifo->opal_fifo_head, head, item);
@@ -116,24 +119,23 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo,
116119
*/
117120
static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
118121
{
119-
opal_list_item_t *item, *next;
122+
opal_list_item_t *item, *next, *ghost = &fifo->opal_fifo_ghost;
120123
opal_counted_pointer_t head, tail;
121124

122125
do {
123-
head.value = fifo->opal_fifo_head.value;
126+
opal_read_counted_pointer (&fifo->opal_fifo_head, &head);
124127
tail.value = fifo->opal_fifo_tail.value;
125128
opal_atomic_rmb ();
126129

127130
item = (opal_list_item_t *) head.data.item;
128131
next = (opal_list_item_t *) item->opal_list_next;
129132

130-
if (&fifo->opal_fifo_ghost == tail.data.item && &fifo->opal_fifo_ghost == item) {
133+
if (ghost == tail.data.item && ghost == item) {
131134
return NULL;
132135
}
133136

134137
/* the head or next pointer are in an inconsistent state. keep looping. */
135-
if (tail.data.item != item && &fifo->opal_fifo_ghost != tail.data.item &&
136-
&fifo->opal_fifo_ghost == next) {
138+
if (tail.data.item != item && ghost != tail.data.item && ghost == next) {
137139
continue;
138140
}
139141

@@ -146,14 +148,14 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
146148
opal_atomic_wmb ();
147149

148150
/* check for tail and head consistency */
149-
if (&fifo->opal_fifo_ghost == next) {
151+
if (ghost == next) {
150152
/* the head was just set to &fifo->opal_fifo_ghost. try to update the tail as well */
151-
if (!opal_update_counted_pointer (&fifo->opal_fifo_tail, tail, &fifo->opal_fifo_ghost)) {
153+
if (!opal_update_counted_pointer (&fifo->opal_fifo_tail, tail, ghost)) {
152154
/* tail was changed by a push operation. wait for the item's next pointer to be se then
153155
* update the head */
154156

155157
/* wait for next pointer to be updated by push */
156-
while (&fifo->opal_fifo_ghost == item->opal_list_next) {
158+
while (ghost == item->opal_list_next) {
157159
opal_atomic_rmb ();
158160
}
159161

@@ -166,7 +168,7 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
166168
head.value = fifo->opal_fifo_head.value;
167169
next = (opal_list_item_t *) item->opal_list_next;
168170

169-
assert (&fifo->opal_fifo_ghost == head.data.item);
171+
assert (ghost == head.data.item);
170172

171173
fifo->opal_fifo_head.data.item = next;
172174
opal_atomic_wmb ();

opal/class/opal_lifo.h

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,33 @@ static inline bool opal_update_counted_pointer (volatile opal_counted_pointer_t
7474
return opal_atomic_cmpset_128 (&addr->value, old.value, new_p.value);
7575
}
7676

77+
__opal_attribute_always_inline__
78+
static inline void opal_read_counted_pointer (volatile opal_counted_pointer_t *addr, opal_counted_pointer_t *value)
79+
{
80+
/* most platforms do not read the value atomically so make sure we read the counted pointer in a specific order */
81+
value->data.counter = addr->data.counter;
82+
opal_atomic_rmb ();
83+
value->data.item = addr->data.item;
84+
}
85+
7786
#endif
7887

88+
/**
89+
* @brief Helper function for lifo/fifo to sleep this thread if excessive contention is detected
90+
*/
91+
static inline void _opal_lifo_release_cpu (void)
92+
{
93+
/* NTH: there are many ways to cause the current thread to be suspended. This one
94+
* should work well in most cases. Another approach would be to use poll (NULL, 0, ) but
95+
* the interval will be forced to be in ms (instead of ns or us). Note that there
96+
* is a performance improvement for the lifo test when this call is made on detection
97+
* of contention but it may not translate into actually MPI or application performance
98+
* improvements. */
99+
static struct timespec interval = { .tv_sec = 0, .tv_nsec = 100 };
100+
nanosleep (&interval, NULL);
101+
}
102+
103+
79104
/* Atomic Last In First Out lists. If we are in a multi-threaded environment then the
80105
* atomicity is insured via the compare-and-swap operation, if not we simply do a read
81106
* and/or a write.
@@ -142,10 +167,8 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo)
142167
opal_list_item_t *item;
143168

144169
do {
145-
146-
old_head.data.counter = lifo->opal_lifo_head.data.counter;
147-
opal_atomic_rmb ();
148-
old_head.data.item = item = (opal_list_item_t*)lifo->opal_lifo_head.data.item;
170+
opal_read_counted_pointer (&lifo->opal_lifo_head, &old_head);
171+
item = (opal_list_item_t *) old_head.data.item;
149172

150173
if (item == &lifo->opal_lifo_ghost) {
151174
return NULL;
@@ -187,18 +210,6 @@ static inline opal_list_item_t *opal_lifo_push_atomic (opal_lifo_t *lifo,
187210

188211
#if OPAL_HAVE_ATOMIC_LLSC_PTR
189212

190-
static inline void _opal_lifo_release_cpu (void)
191-
{
192-
/* NTH: there are many ways to cause the current thread to be suspended. This one
193-
* should work well in most cases. Another approach would be to use poll (NULL, 0, ) but
194-
* the interval will be forced to be in ms (instead of ns or us). Note that there
195-
* is a performance improvement for the lifo test when this call is made on detection
196-
* of contention but it may not translate into actually MPI or application performance
197-
* improvements. */
198-
static struct timespec interval = { .tv_sec = 0, .tv_nsec = 100 };
199-
nanosleep (&interval, NULL);
200-
}
201-
202213
/* Retrieve one element from the LIFO. If we reach the ghost element then the LIFO
203214
* is empty so we return NULL.
204215
*/

0 commit comments

Comments
 (0)