Skip to content

Commit 3024c05

Browse files
authored
[3.6] bpo-30703: Improve signal delivery (GH-2415) (#2527)
* [3.6] bpo-30703: Improve signal delivery (GH-2415) * Improve signal delivery Avoid using Py_AddPendingCall from signal handler, to avoid calling signal-unsafe functions. * Remove unused function * Improve comments * Add stress test * Adapt for --without-threads * Add second stress test * Add NEWS blurb * Address comments @Haypo. (cherry picked from commit c08177a) * bpo-30796: Fix failures in signal delivery stress test (#2488) * bpo-30796: Fix failures in signal delivery stress test setitimer() can have a poor minimum resolution on some machines, this would make the test reach its deadline (and a stray signal could then kill a subsequent test). * Make sure to clear the itimer after the test
1 parent 48290c1 commit 3024c05

File tree

5 files changed

+207
-38
lines changed

5 files changed

+207
-38
lines changed

Include/ceval.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ PyAPI_FUNC(int) PyEval_MergeCompilerFlags(PyCompilerFlags *cf);
4646
#endif
4747

4848
PyAPI_FUNC(int) Py_AddPendingCall(int (*func)(void *), void *arg);
49+
PyAPI_FUNC(void) _PyEval_SignalReceived(void);
4950
PyAPI_FUNC(int) Py_MakePendingCalls(void);
5051

5152
/* Protection against deeply nested recursive calls

Lib/test/test_signal.py

Lines changed: 132 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@
33
from contextlib import closing
44
import enum
55
import gc
6+
import os
67
import pickle
8+
import random
79
import select
810
import signal
911
import socket
10-
import struct
12+
import statistics
1113
import subprocess
1214
import traceback
1315
import sys, os, time, errno
@@ -955,6 +957,135 @@ def handler(signum, frame):
955957
(exitcode, stdout))
956958

957959

960+
class StressTest(unittest.TestCase):
961+
"""
962+
Stress signal delivery, especially when a signal arrives in
963+
the middle of recomputing the signal state or executing
964+
previously tripped signal handlers.
965+
"""
966+
967+
def setsig(self, signum, handler):
968+
old_handler = signal.signal(signum, handler)
969+
self.addCleanup(signal.signal, signum, old_handler)
970+
971+
def measure_itimer_resolution(self):
972+
N = 20
973+
times = []
974+
975+
def handler(signum=None, frame=None):
976+
if len(times) < N:
977+
times.append(time.perf_counter())
978+
# 1 µs is the smallest possible timer interval,
979+
# we want to measure what the concrete duration
980+
# will be on this platform
981+
signal.setitimer(signal.ITIMER_REAL, 1e-6)
982+
983+
self.addCleanup(signal.setitimer, signal.ITIMER_REAL, 0)
984+
self.setsig(signal.SIGALRM, handler)
985+
handler()
986+
while len(times) < N:
987+
time.sleep(1e-3)
988+
989+
durations = [times[i+1] - times[i] for i in range(len(times) - 1)]
990+
med = statistics.median(durations)
991+
if support.verbose:
992+
print("detected median itimer() resolution: %.6f s." % (med,))
993+
return med
994+
995+
def decide_itimer_count(self):
996+
# Some systems have poor setitimer() resolution (for example
997+
# measured around 20 ms. on FreeBSD 9), so decide on a reasonable
998+
# number of sequential timers based on that.
999+
reso = self.measure_itimer_resolution()
1000+
if reso <= 1e-4:
1001+
return 10000
1002+
elif reso <= 1e-2:
1003+
return 100
1004+
else:
1005+
self.skipTest("detected itimer resolution (%.3f s.) too high "
1006+
"(> 10 ms.) on this platform (or system too busy)"
1007+
% (reso,))
1008+
1009+
@unittest.skipUnless(hasattr(signal, "setitimer"),
1010+
"test needs setitimer()")
1011+
def test_stress_delivery_dependent(self):
1012+
"""
1013+
This test uses dependent signal handlers.
1014+
"""
1015+
N = self.decide_itimer_count()
1016+
sigs = []
1017+
1018+
def first_handler(signum, frame):
1019+
# 1e-6 is the minimum non-zero value for `setitimer()`.
1020+
# Choose a random delay so as to improve chances of
1021+
# triggering a race condition. Ideally the signal is received
1022+
# when inside critical signal-handling routines such as
1023+
# Py_MakePendingCalls().
1024+
signal.setitimer(signal.ITIMER_REAL, 1e-6 + random.random() * 1e-5)
1025+
1026+
def second_handler(signum=None, frame=None):
1027+
sigs.append(signum)
1028+
1029+
# Here on Linux, SIGPROF > SIGALRM > SIGUSR1. By using both
1030+
# ascending and descending sequences (SIGUSR1 then SIGALRM,
1031+
# SIGPROF then SIGALRM), we maximize chances of hitting a bug.
1032+
self.setsig(signal.SIGPROF, first_handler)
1033+
self.setsig(signal.SIGUSR1, first_handler)
1034+
self.setsig(signal.SIGALRM, second_handler) # for ITIMER_REAL
1035+
1036+
expected_sigs = 0
1037+
deadline = time.time() + 15.0
1038+
1039+
while expected_sigs < N:
1040+
os.kill(os.getpid(), signal.SIGPROF)
1041+
expected_sigs += 1
1042+
# Wait for handlers to run to avoid signal coalescing
1043+
while len(sigs) < expected_sigs and time.time() < deadline:
1044+
time.sleep(1e-5)
1045+
1046+
os.kill(os.getpid(), signal.SIGUSR1)
1047+
expected_sigs += 1
1048+
while len(sigs) < expected_sigs and time.time() < deadline:
1049+
time.sleep(1e-5)
1050+
1051+
# All ITIMER_REAL signals should have been delivered to the
1052+
# Python handler
1053+
self.assertEqual(len(sigs), N, "Some signals were lost")
1054+
1055+
@unittest.skipUnless(hasattr(signal, "setitimer"),
1056+
"test needs setitimer()")
1057+
def test_stress_delivery_simultaneous(self):
1058+
"""
1059+
This test uses simultaneous signal handlers.
1060+
"""
1061+
N = self.decide_itimer_count()
1062+
sigs = []
1063+
1064+
def handler(signum, frame):
1065+
sigs.append(signum)
1066+
1067+
self.setsig(signal.SIGUSR1, handler)
1068+
self.setsig(signal.SIGALRM, handler) # for ITIMER_REAL
1069+
1070+
expected_sigs = 0
1071+
deadline = time.time() + 15.0
1072+
1073+
while expected_sigs < N:
1074+
# Hopefully the SIGALRM will be received somewhere during
1075+
# initial processing of SIGUSR1.
1076+
signal.setitimer(signal.ITIMER_REAL, 1e-6 + random.random() * 1e-5)
1077+
os.kill(os.getpid(), signal.SIGUSR1)
1078+
1079+
expected_sigs += 2
1080+
# Wait for handlers to run to avoid signal coalescing
1081+
while len(sigs) < expected_sigs and time.time() < deadline:
1082+
time.sleep(1e-5)
1083+
1084+
# All ITIMER_REAL signals should have been delivered to the
1085+
# Python handler
1086+
self.assertEqual(len(sigs), N, "Some signals were lost")
1087+
1088+
9581089
def tearDownModule():
9591090
support.reap_children()
9601091

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
Improve signal delivery.
2+
3+
Avoid using Py_AddPendingCall from signal handler, to avoid calling signal-
4+
unsafe functions. The tests I'm adding here fail without the rest of the
5+
patch, on Linux and OS X. This means our signal delivery logic had defects
6+
(some signals could be lost).

Modules/signalmodule.c

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -192,12 +192,6 @@ The default handler for SIGINT installed by Python.\n\
192192
It raises KeyboardInterrupt.");
193193

194194

195-
static int
196-
checksignals_witharg(void * unused)
197-
{
198-
return PyErr_CheckSignals();
199-
}
200-
201195
static int
202196
report_wakeup_write_error(void *data)
203197
{
@@ -248,17 +242,15 @@ trip_signal(int sig_num)
248242

249243
Handlers[sig_num].tripped = 1;
250244

251-
if (!is_tripped) {
252-
/* Set is_tripped after setting .tripped, as it gets
253-
cleared in PyErr_CheckSignals() before .tripped. */
254-
is_tripped = 1;
255-
Py_AddPendingCall(checksignals_witharg, NULL);
256-
}
245+
/* Set is_tripped after setting .tripped, as it gets
246+
cleared in PyErr_CheckSignals() before .tripped. */
247+
is_tripped = 1;
248+
_PyEval_SignalReceived();
257249

258250
/* And then write to the wakeup fd *after* setting all the globals and
259-
doing the Py_AddPendingCall. We used to write to the wakeup fd and then
260-
set the flag, but this allowed the following sequence of events
261-
(especially on windows, where trip_signal runs in a new thread):
251+
doing the _PyEval_SignalReceived. We used to write to the wakeup fd
252+
and then set the flag, but this allowed the following sequence of events
253+
(especially on windows, where trip_signal may run in a new thread):
262254
263255
- main thread blocks on select([wakeup_fd], ...)
264256
- signal arrives
@@ -293,6 +285,8 @@ trip_signal(int sig_num)
293285
wakeup.send_err_set = 1;
294286
wakeup.send_errno = errno;
295287
wakeup.send_win_error = GetLastError();
288+
/* Py_AddPendingCall() isn't signal-safe, but we
289+
still use it for this exceptional case. */
296290
Py_AddPendingCall(report_wakeup_send_error, NULL);
297291
}
298292
}
@@ -306,6 +300,8 @@ trip_signal(int sig_num)
306300
rc = _Py_write_noraise(fd, &byte, 1);
307301

308302
if (rc < 0) {
303+
/* Py_AddPendingCall() isn't signal-safe, but we
304+
still use it for this exceptional case. */
309305
Py_AddPendingCall(report_wakeup_write_error,
310306
(void *)(intptr_t)errno);
311307
}
@@ -1555,8 +1551,10 @@ PyErr_CheckSignals(void)
15551551
arglist);
15561552
Py_DECREF(arglist);
15571553
}
1558-
if (!result)
1554+
if (!result) {
1555+
is_tripped = 1;
15591556
return -1;
1557+
}
15601558

15611559
Py_DECREF(result);
15621560
}

Python/ceval.c

Lines changed: 54 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,15 @@ PyEval_GetCallStats(PyObject *self)
195195
do { pending_async_exc = 0; COMPUTE_EVAL_BREAKER(); } while (0)
196196

197197

198+
/* This single variable consolidates all requests to break out of the fast path
199+
in the eval loop. */
200+
static _Py_atomic_int eval_breaker = {0};
201+
/* Request for running pending calls. */
202+
static _Py_atomic_int pendingcalls_to_do = {0};
203+
/* Request for looking at the `async_exc` field of the current thread state.
204+
Guarded by the GIL. */
205+
static int pending_async_exc = 0;
206+
198207
#ifdef WITH_THREAD
199208

200209
#ifdef HAVE_ERRNO_H
@@ -204,16 +213,8 @@ PyEval_GetCallStats(PyObject *self)
204213

205214
static PyThread_type_lock pending_lock = 0; /* for pending calls */
206215
static long main_thread = 0;
207-
/* This single variable consolidates all requests to break out of the fast path
208-
in the eval loop. */
209-
static _Py_atomic_int eval_breaker = {0};
210216
/* Request for dropping the GIL */
211217
static _Py_atomic_int gil_drop_request = {0};
212-
/* Request for running pending calls. */
213-
static _Py_atomic_int pendingcalls_to_do = {0};
214-
/* Request for looking at the `async_exc` field of the current thread state.
215-
Guarded by the GIL. */
216-
static int pending_async_exc = 0;
217218

218219
#include "ceval_gil.h"
219220

@@ -326,9 +327,6 @@ PyEval_ReInitThreads(void)
326327
_PyThreadState_DeleteExcept(current_tstate);
327328
}
328329

329-
#else
330-
static _Py_atomic_int eval_breaker = {0};
331-
static int pending_async_exc = 0;
332330
#endif /* WITH_THREAD */
333331

334332
/* This function is used to signal that async exceptions are waiting to be
@@ -403,6 +401,15 @@ PyEval_RestoreThread(PyThreadState *tstate)
403401
#endif
404402
*/
405403

404+
void
405+
_PyEval_SignalReceived(void)
406+
{
407+
/* bpo-30703: Function called when the C signal handler of Python gets a
408+
signal. We cannot queue a callback using Py_AddPendingCall() since
409+
that function is not async-signal-safe. */
410+
SIGNAL_PENDING_CALLS();
411+
}
412+
406413
#ifdef WITH_THREAD
407414

408415
/* The WITH_THREAD implementation is thread-safe. It allows
@@ -467,6 +474,8 @@ Py_MakePendingCalls(void)
467474
int i;
468475
int r = 0;
469476

477+
assert(PyGILState_Check());
478+
470479
if (!pending_lock) {
471480
/* initial allocation of the lock */
472481
pending_lock = PyThread_allocate_lock();
@@ -481,6 +490,16 @@ Py_MakePendingCalls(void)
481490
if (busy)
482491
return 0;
483492
busy = 1;
493+
/* unsignal before starting to call callbacks, so that any callback
494+
added in-between re-signals */
495+
UNSIGNAL_PENDING_CALLS();
496+
497+
/* Python signal handler doesn't really queue a callback: it only signals
498+
that a signal was received, see _PyEval_SignalReceived(). */
499+
if (PyErr_CheckSignals() < 0) {
500+
goto error;
501+
}
502+
484503
/* perform a bounded number of calls, in case of recursion */
485504
for (i=0; i<NPENDINGCALLS; i++) {
486505
int j;
@@ -497,20 +516,23 @@ Py_MakePendingCalls(void)
497516
arg = pendingcalls[j].arg;
498517
pendingfirst = (j + 1) % NPENDINGCALLS;
499518
}
500-
if (pendingfirst != pendinglast)
501-
SIGNAL_PENDING_CALLS();
502-
else
503-
UNSIGNAL_PENDING_CALLS();
504519
PyThread_release_lock(pending_lock);
505520
/* having released the lock, perform the callback */
506521
if (func == NULL)
507522
break;
508523
r = func(arg);
509-
if (r)
510-
break;
524+
if (r) {
525+
goto error;
526+
}
511527
}
528+
512529
busy = 0;
513530
return r;
531+
532+
error:
533+
busy = 0;
534+
SIGNAL_PENDING_CALLS(); /* We're not done yet */
535+
return -1;
514536
}
515537

516538
#else /* if ! defined WITH_THREAD */
@@ -545,7 +567,6 @@ static struct {
545567
} pendingcalls[NPENDINGCALLS];
546568
static volatile int pendingfirst = 0;
547569
static volatile int pendinglast = 0;
548-
static _Py_atomic_int pendingcalls_to_do = {0};
549570

550571
int
551572
Py_AddPendingCall(int (*func)(void *), void *arg)
@@ -579,7 +600,16 @@ Py_MakePendingCalls(void)
579600
if (busy)
580601
return 0;
581602
busy = 1;
603+
604+
/* unsignal before starting to call callbacks, so that any callback
605+
added in-between re-signals */
582606
UNSIGNAL_PENDING_CALLS();
607+
/* Python signal handler doesn't really queue a callback: it only signals
608+
that a signal was received, see _PyEval_SignalReceived(). */
609+
if (PyErr_CheckSignals() < 0) {
610+
goto error;
611+
}
612+
583613
for (;;) {
584614
int i;
585615
int (*func)(void *);
@@ -591,13 +621,16 @@ Py_MakePendingCalls(void)
591621
arg = pendingcalls[i].arg;
592622
pendingfirst = (i + 1) % NPENDINGCALLS;
593623
if (func(arg) < 0) {
594-
busy = 0;
595-
SIGNAL_PENDING_CALLS(); /* We're not done yet */
596-
return -1;
624+
goto error:
597625
}
598626
}
599627
busy = 0;
600628
return 0;
629+
630+
error:
631+
busy = 0;
632+
SIGNAL_PENDING_CALLS(); /* We're not done yet */
633+
return -1;
601634
}
602635

603636
#endif /* WITH_THREAD */

0 commit comments

Comments
 (0)