@@ -168,23 +168,8 @@ opal_progress_finalize(void)
168168 return OPAL_SUCCESS ;
169169}
170170
171-
172- /*
173- * Progress the event library and any functions that have registered to
174- * be called. We don't propogate errors from the progress functions,
175- * so no action is taken if they return failures. The functions are
176- * expected to return the number of events progressed, to determine
177- * whether or not we should call sched_yield() during MPI progress.
178- * This is only losely tracked, as an error return can cause the number
179- * of progressed events to appear lower than it actually is. We don't
180- * care, as the cost of that happening is far outweighed by the cost
181- * of the if checks (they were resulting in bad pipe stalling behavior)
182- */
183- void
184- opal_progress (void )
171+ static int opal_progress_events ()
185172{
186- static volatile uint32_t num_calls = 0 ;
187- size_t i ;
188173 int events = 0 ;
189174
190175 if ( opal_progress_event_flag != 0 ) {
@@ -217,16 +202,44 @@ opal_progress(void)
217202#endif /* OPAL_HAVE_WORKING_EVENTOPS */
218203 }
219204
205+ return events ;
206+ }
207+
208+ /*
209+ * Progress the event library and any functions that have registered to
210+ * be called. We don't propogate errors from the progress functions,
211+ * so no action is taken if they return failures. The functions are
212+ * expected to return the number of events progressed, to determine
213+ * whether or not we should call sched_yield() during MPI progress.
214+ * This is only losely tracked, as an error return can cause the number
215+ * of progressed events to appear lower than it actually is. We don't
216+ * care, as the cost of that happening is far outweighed by the cost
217+ * of the if checks (they were resulting in bad pipe stalling behavior)
218+ */
219+ void
220+ opal_progress (void )
221+ {
222+ static volatile uint32_t num_calls = 0 ;
223+ size_t i ;
224+ int events = 0 ;
225+
220226 /* progress all registered callbacks */
221227 for (i = 0 ; i < callbacks_len ; ++ i ) {
222228 events += (callbacks [i ])();
223229 }
224230
225- if (callbacks_lp_len > 0 && (OPAL_THREAD_ADD_FETCH32 ((volatile int32_t * ) & num_calls , 1 ) & 0x7 ) == 0 ) {
226- /* run low priority callbacks once every 8 calls to opal_progress() */
231+ /* Run low priority callbacks and events once every 8 calls to opal_progress().
232+ * Even though "num_calls" can be modified by multiple threads, we do not use
233+ * atomic operations here, for performance reasons. In case of a race, the
234+ * number of calls may be inaccurate, but since it will eventually be incremented,
235+ * it's not a problem.
236+ */
237+ if (((num_calls ++ ) & 0x7 ) == 0 ) {
227238 for (i = 0 ; i < callbacks_lp_len ; ++ i ) {
228239 events += (callbacks_lp [i ])();
229240 }
241+
242+ opal_progress_events ();
230243 }
231244
232245#if OPAL_HAVE_SCHED_YIELD
0 commit comments