Skip to content

Commit 1f6d7ba

Browse files
avtikhonTotktonada
authored andcommitted
Add test_timeout to limit test run time
Added 'test-timeout' option to be able to break the test process with kill signal if the test runs longer than this amount of seconds. By default it is equal to 110 seconds. This value should be bigger than 'replication-sync-timeout' (which is 100 seconds by default) and lower than 'no-output-timeout' (which is 120 seconds by default). This timeout helped to avoid of issues with hanging tests till reach of 'no-output-timeout' timeout, when overall testing exits. For now if the test hangs than 'test-timeout' timeout helps to exit the test processes. It gives the test-run worker chance to restart the failed test either continue tests in worker queue. Before this fix tests, hanged, like [1] and [2], for now the same issues resolved, like [3] and [4] appropriate. To reproduce the issues like [2], try to set 'test-timeout' not enough to complete the test on 'restart server ...' command, like: ./test-run.py replication/quorum.test.lua --test-timeout 5 \ --no-output-timeout 10 --conf memtx This commit implements terminating of stuck AppServer instances by SIGKILL. However there are still problems regarding stopping and waiting of non-default instances. They will be resolved in the following PRs / commits. See PR #244 for details. Part of #157 [1] - https://gitlab.com/tarantool/tarantool/-/jobs/835734706#L4968 [2] - https://gitlab.com/tarantool/tarantool/-/jobs/822649038#L4835 [3] - https://gitlab.com/tarantool/tarantool/-/jobs/874058059#L4993 [4] - https://gitlab.com/tarantool/tarantool/-/jobs/874058745#L5316
1 parent e843552 commit 1f6d7ba

File tree

3 files changed

+35
-5
lines changed

3 files changed

+35
-5
lines changed

lib/app_server.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@
66

77
from gevent.subprocess import Popen, PIPE
88

9+
from lib.colorer import color_stdout
910
from lib.colorer import color_log
11+
from lib.options import Options
1012
from lib.preprocessor import TestState
1113
from lib.server import Server
1214
from lib.server import DEFAULT_SNAPSHOT_NAME
@@ -17,12 +19,22 @@
1719
from lib.utils import format_process
1820
from lib.utils import warn_unix_socket
1921
from test import TestRunGreenlet, TestExecutionError
22+
from threading import Timer
23+
24+
25+
def timeout_handler(server_process, test_timeout):
26+
color_stdout("Test timeout of %d secs reached\t" % test_timeout, schema='error')
27+
server_process.kill()
2028

2129

2230
def run_server(execs, cwd, server, logfile, retval):
2331
os.putenv("LISTEN", server.iproto)
2432
server.process = Popen(execs, stdout=PIPE, stderr=PIPE, cwd=cwd)
33+
test_timeout = Options().args.test_timeout
34+
timer = Timer(test_timeout, timeout_handler, (server.process, test_timeout))
35+
timer.start()
2536
stdout, stderr = server.process.communicate()
37+
timer.cancel()
2638
sys.stdout.write(stdout)
2739
with open(logfile, 'a') as f:
2840
f.write(stderr)

lib/options.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,14 @@ def __init__(self):
190190
Such files created by workers in the "var/reproduce" directory.
191191
Note: The option works now only with parallel testing.""")
192192

193+
parser.add_argument(
194+
"--test-timeout",
195+
dest="test_timeout",
196+
default=110,
197+
type=int,
198+
help="""Break the test process with kill signal if the test runs
199+
longer than this amount of seconds. Default: 110 [seconds].""")
200+
193201
parser.add_argument(
194202
"--no-output-timeout",
195203
dest="no_output_timeout",

lib/tarantool_server.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import yaml
1515

1616
from gevent import socket
17+
from gevent import Timeout
1718
from greenlet import GreenletExit
1819
from threading import Timer
1920

@@ -27,6 +28,7 @@
2728
from lib.colorer import color_stdout
2829
from lib.colorer import color_log
2930
from lib.colorer import qa_notice
31+
from lib.options import Options
3032
from lib.preprocessor import TestState
3133
from lib.server import Server
3234
from lib.server import DEFAULT_SNAPSHOT_NAME
@@ -44,12 +46,21 @@
4446
def save_join(green_obj, timeout=None):
4547
"""
4648
Gevent join wrapper for
47-
test-run stop-on-crash feature
49+
test-run stop-on-crash/stop-on-timeout feature
4850
49-
:return True in case of crash and False otherwise
51+
:return True in case of crash or test timeout and False otherwise
5052
"""
5153
try:
52-
green_obj.join(timeout=timeout)
54+
green_obj.get(timeout=timeout)
55+
except Timeout:
56+
color_stdout("Test timeout of %d secs reached\t" % timeout, schema='error')
57+
# We should kill the greenlet that writes to a temporary
58+
# result file. If the same test is run several times (e.g.
59+
# on different configurations), this greenlet may wake up
60+
# and write to the temporary result file of the new run of
61+
# the test.
62+
green_obj.kill()
63+
return True
5364
except GreenletExit:
5465
return True
5566
# We don't catch TarantoolStartError here to propagate it to a parent
@@ -60,7 +71,6 @@ def save_join(green_obj, timeout=None):
6071
class LuaTest(Test):
6172
""" Handle *.test.lua and *.test.sql test files. """
6273

63-
TIMEOUT = 60 * 10
6474
RESULT_FILE_VERSION_INITIAL = 1
6575
RESULT_FILE_VERSION_DEFAULT = 2
6676
RESULT_FILE_VERSION_LINE_RE = re.compile(
@@ -378,7 +388,7 @@ def execute(self, server):
378388
lua.start()
379389
crash_occured = True
380390
try:
381-
crash_occured = save_join(lua, timeout=self.TIMEOUT)
391+
crash_occured = save_join(lua, timeout=Options().args.test_timeout)
382392
self.killall_servers(server, ts, crash_occured)
383393
except KeyboardInterrupt:
384394
# prevent tests greenlet from writing to the real stdout

0 commit comments

Comments
 (0)