@@ -410,9 +410,11 @@ pthread_mutex_t in_signal_lock; // shared with jl_delete_thread
410410static bt_context_t * signal_context ; // protected by in_signal_lock
411411static int exit_signal_cond = -1 ;
412412static int signal_caught_cond = -1 ;
413+ static int signals_inflight = 0 ;
413414
414415int jl_thread_suspend_and_get_state (int tid , int timeout , bt_context_t * ctx )
415416{
417+ int err ;
416418 pthread_mutex_lock (& in_signal_lock );
417419 jl_ptls_t ptls2 = jl_atomic_load_relaxed (& jl_all_tls_states )[tid ];
418420 jl_task_t * ct2 = ptls2 ? jl_atomic_load_relaxed (& ptls2 -> current_task ) : NULL ;
@@ -421,24 +423,45 @@ int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx)
421423 pthread_mutex_unlock (& in_signal_lock );
422424 return 0 ;
423425 }
424- sig_atomic_t request = 0 ;
425- if (!jl_atomic_cmpswap (& ptls2 -> signal_request , & request , 1 )) {
426+ while (signals_inflight ) {
426427 // something is wrong, or there is already a usr2 in flight elsewhere
427- pthread_mutex_unlock (& in_signal_lock );
428- return 0 ;
428+ // try to wait for it to finish or wait for timeout
429+ struct pollfd event = {signal_caught_cond , POLLIN , 0 };
430+ do {
431+ err = poll (& event , 1 , timeout * 1000 );
432+ } while (err == -1 && errno == EINTR );
433+ if (err == -1 || (event .revents & POLLIN ) == 0 ) {
434+ // not ready after timeout: cancel this request
435+ pthread_mutex_unlock (& in_signal_lock );
436+ return 0 ;
437+ }
438+ // consume it before continuing
439+ eventfd_t got ;
440+ do {
441+ err = read (signal_caught_cond , & got , sizeof (eventfd_t ));
442+ } while (err == -1 && errno == EINTR );
443+ if (err != sizeof (eventfd_t )) abort ();
444+ assert (signals_inflight >= got );
445+ signals_inflight -= got ;
429446 }
447+ signals_inflight ++ ;
448+ sig_atomic_t request = jl_atomic_exchange (& ptls2 -> signal_request , 1 );
449+ assert (request == 0 || request == -1 );
430450 request = 1 ;
431- int err = pthread_kill (ptls2 -> system_id , SIGUSR2 );
432- // wait for thread to acknowledge or timeout
433- struct pollfd event = {signal_caught_cond , POLLIN , 0 };
451+ err = pthread_kill (ptls2 -> system_id , SIGUSR2 );
434452 if (err == 0 ) {
453+ // wait for thread to acknowledge or timeout
454+ struct pollfd event = {signal_caught_cond , POLLIN , 0 };
435455 do {
436456 err = poll (& event , 1 , timeout * 1000 );
437457 } while (err == -1 && errno == EINTR );
458+ if (err != 1 || (event .revents & POLLIN ) == 0 )
459+ err = -1 ;
438460 }
439- if (( event . revents & POLLIN ) == 0 ) {
461+ if (err == -1 ) {
440462 // not ready after timeout: try to cancel this request
441463 if (jl_atomic_cmpswap (& ptls2 -> signal_request , & request , 0 )) {
464+ signals_inflight -- ;
442465 pthread_mutex_unlock (& in_signal_lock );
443466 return 0 ;
444467 }
@@ -448,11 +471,13 @@ int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx)
448471 err = read (signal_caught_cond , & got , sizeof (eventfd_t ));
449472 } while (err == -1 && errno == EINTR );
450473 if (err != sizeof (eventfd_t )) abort ();
451- assert (got == 1 ); (void ) got ;
474+ assert (signals_inflight >= got );
475+ signals_inflight -= got ;
476+ signals_inflight ++ ;
452477 // Now the other thread is waiting on exit_signal_cond (verify that here by
453478 // checking it is 0, and add an acquire barrier for good measure)
454479 request = jl_atomic_load_acquire (& ptls2 -> signal_request );
455- assert (request == 0 ); (void ) request ;
480+ assert (request == 0 || request == -1 ); (void ) request ;
456481 jl_atomic_store_release (& ptls2 -> signal_request , 4 ); // prepare to resume normally, but later code may change this
457482 * ctx = * signal_context ;
458483 return 1 ;
@@ -475,6 +500,7 @@ static void jl_try_deliver_sigint(void)
475500 jl_safepoint_enable_sigint ();
476501 jl_wake_libuv ();
477502 pthread_mutex_lock (& in_signal_lock );
503+ signals_inflight ++ ;
478504 jl_atomic_store_release (& ptls2 -> signal_request , 2 );
479505 // This also makes sure `sleep` is aborted.
480506 pthread_kill (ptls2 -> system_id , SIGUSR2 );
@@ -511,6 +537,7 @@ static void jl_exit_thread0(int signo, jl_bt_element_t *bt_data, size_t bt_size)
511537}
512538
513539// request:
540+ // -1: processing
514541// 0: nothing [not from here]
515542// 1: get state & wait for request
516543// 2: throw sigint if `!defer_signal && io_wait` or if force throw threshold
@@ -526,22 +553,36 @@ void usr2_handler(int sig, siginfo_t *info, void *ctx)
526553 if (ptls == NULL )
527554 return ;
528555 int errno_save = errno ;
529- // acknowledge that we saw the signal_request
530- sig_atomic_t request = jl_atomic_exchange (& ptls -> signal_request , 0 );
556+ sig_atomic_t request = jl_atomic_load (& ptls -> signal_request );
557+ if (request == 0 )
558+ return ;
559+ if (!jl_atomic_cmpswap (& ptls -> signal_request , & request , -1 ))
560+ return ;
531561 if (request == 1 ) {
532562 signal_context = jl_to_bt_context (ctx );
563+ // acknowledge that we saw the signal_request and set signal_context
533564 int err ;
534565 eventfd_t got = 1 ;
535566 err = write (signal_caught_cond , & got , sizeof (eventfd_t ));
536567 if (err != sizeof (eventfd_t )) abort ();
568+ sig_atomic_t processing = -1 ;
569+ jl_atomic_cmpswap (& ptls -> signal_request , & processing , 0 );
570+ // wait for exit signal
537571 do {
538572 err = read (exit_signal_cond , & got , sizeof (eventfd_t ));
539573 } while (err == -1 && errno == EINTR );
540574 if (err != sizeof (eventfd_t )) abort ();
541575 assert (got == 1 );
542- request = jl_atomic_exchange (& ptls -> signal_request , 0 );
576+ request = jl_atomic_exchange (& ptls -> signal_request , -1 );
577+ signal_context = NULL ;
543578 assert (request == 2 || request == 3 || request == 4 );
544579 }
580+ int err ;
581+ eventfd_t got = 1 ;
582+ err = write (signal_caught_cond , & got , sizeof (eventfd_t ));
583+ if (err != sizeof (eventfd_t )) abort ();
584+ sig_atomic_t processing = -1 ;
585+ jl_atomic_cmpswap (& ptls -> signal_request , & processing , 0 );
545586 if (request == 2 ) {
546587 int force = jl_check_force_sigint ();
547588 if (force || (!ptls -> defer_signal && ptls -> io_wait )) {
0 commit comments