Skip to content

Commit c923c34

Browse files
authored
bpo-36719: Fix regrtest MultiprocessThread (GH-13301)
MultiprocessThread.kill() now closes stdout and stderr to prevent popen.communicate() to hang.
1 parent 1a10a6b commit c923c34

File tree

1 file changed

+55
-4
lines changed

1 file changed

+55
-4
lines changed

Lib/test/libregrtest/runtest_mp.py

Lines changed: 55 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@
2121
# Display the running tests if nothing happened last N seconds
2222
PROGRESS_UPDATE = 30.0 # seconds
2323

24+
# Time to wait until a worker completes: should be immediate
25+
JOIN_TIMEOUT = 30.0 # seconds
26+
2427

2528
def must_stop(result, ns):
2629
if result.result == INTERRUPTED:
@@ -91,6 +94,10 @@ def stop(self):
9194
MultiprocessResult = collections.namedtuple('MultiprocessResult',
9295
'result stdout stderr error_msg')
9396

97+
class ExitThread(Exception):
98+
pass
99+
100+
94101
class MultiprocessThread(threading.Thread):
95102
def __init__(self, pending, output, ns):
96103
super().__init__()
@@ -100,13 +107,31 @@ def __init__(self, pending, output, ns):
100107
self.current_test_name = None
101108
self.start_time = None
102109
self._popen = None
110+
self._killed = False
111+
112+
def __repr__(self):
113+
info = ['MultiprocessThread']
114+
test = self.current_test_name
115+
if self.is_alive():
116+
info.append('alive')
117+
if test:
118+
info.append(f'test={test}')
119+
popen = self._popen
120+
if popen:
121+
info.append(f'pid={popen.pid}')
122+
return '<%s>' % ' '.join(info)
103123

104124
def kill(self):
125+
self._killed = True
126+
105127
popen = self._popen
106128
if popen is None:
107129
return
108-
print("Kill regrtest worker process %s" % popen.pid)
109130
popen.kill()
131+
# stdout and stderr must be closed to ensure that communicate()
132+
# does not hang
133+
popen.stdout.close()
134+
popen.stderr.close()
110135

111136
def _runtest(self, test_name):
112137
try:
@@ -117,7 +142,21 @@ def _runtest(self, test_name):
117142
popen = self._popen
118143
with popen:
119144
try:
120-
stdout, stderr = popen.communicate()
145+
if self._killed:
146+
# If kill() has been called before self._popen is set,
147+
# self._popen is still running. Call again kill()
148+
# to ensure that the process is killed.
149+
self.kill()
150+
raise ExitThread
151+
152+
try:
153+
stdout, stderr = popen.communicate()
154+
except OSError:
155+
if self._killed:
156+
# kill() has been called: communicate() fails
157+
# on reading closed stdout/stderr
158+
raise ExitThread
159+
raise
121160
except:
122161
self.kill()
123162
popen.wait()
@@ -154,7 +193,7 @@ def _runtest(self, test_name):
154193
return MultiprocessResult(result, stdout, stderr, err_msg)
155194

156195
def run(self):
157-
while True:
196+
while not self._killed:
158197
try:
159198
try:
160199
test_name = next(self.pending)
@@ -166,6 +205,8 @@ def run(self):
166205

167206
if must_stop(mp_result.result, self.ns):
168207
break
208+
except ExitThread:
209+
break
169210
except BaseException:
170211
self.output.put((True, traceback.format_exc()))
171212
break
@@ -205,10 +246,20 @@ def start_workers(self):
205246
worker.start()
206247

207248
def wait_workers(self):
249+
start_time = time.monotonic()
208250
for worker in self.workers:
209251
worker.kill()
210252
for worker in self.workers:
211-
worker.join()
253+
while True:
254+
worker.join(1.0)
255+
if not worker.is_alive():
256+
break
257+
dt = time.monotonic() - start_time
258+
print("Wait for regrtest worker %r for %.1f sec" % (worker, dt))
259+
if dt > JOIN_TIMEOUT:
260+
print("Warning -- failed to join a regrtest worker %s"
261+
% worker)
262+
break
212263

213264
def _get_result(self):
214265
if not any(worker.is_alive() for worker in self.workers):

0 commit comments

Comments
 (0)