Skip to content

Commit a87ba5c

Browse files
author
Yuanfang Chen
committed
[lit] Use sharding for GoogleTest format
This helps lit unit test performance by a lot, especially on windows. The performance gain comes from launching one gtest executable for many subtests instead of one (this is the current situation). The shards are executed by the test runner and the results are stored in the json format supported by the GoogleTest. Later in the test reporting stage, all test results in the json file are retrieved to continue the test results summary etc. On my Win10 desktop, before this patch: `check-clang-unit`: 177s, `check-llvm-unit`: 38s; after this patch: `check-clang-unit`: 37s, `check-llvm-unit`: 11s. On my Linux machine, before this patch: `check-clang-unit`: 46s, `check-llvm-unit`: 8s; after this patch: `check-clang-unit`: 7s, `check-llvm-unit`: 4s. Reviewed By: yln, rnk Differential Revision: https://reviews.llvm.org/D122251
1 parent f830392 commit a87ba5c

File tree

13 files changed

+327
-224
lines changed

13 files changed

+327
-224
lines changed

llvm/unittests/Support/CrashRecoveryTest.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,11 @@ TEST(CrashRecoveryTest, UnixCRCReturnCode) {
178178
int Res = setenv("LLVM_CRC_UNIXCRCRETURNCODE", "1", 0);
179179
ASSERT_EQ(Res, 0);
180180

181+
Res = unsetenv("GTEST_SHARD_INDEX");
182+
ASSERT_EQ(Res, 0);
183+
Res = unsetenv("GTEST_TOTAL_SHARDS");
184+
ASSERT_EQ(Res, 0);
185+
181186
std::string Error;
182187
bool ExecutionFailed;
183188
int RetCode = ExecuteAndWait(Executable, argv, {}, {}, 0, 0, &Error,

llvm/unittests/Support/ProgramTest.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,9 @@ class ProgramEnvTest : public testing::Test {
9595
};
9696

9797
while (*EnvP != nullptr) {
98-
EnvTable.emplace_back(prepareEnvVar(*EnvP));
98+
auto S = prepareEnvVar(*EnvP);
99+
if (!StringRef(S).startswith("GTEST_"))
100+
EnvTable.emplace_back(S);
99101
++EnvP;
100102
}
101103
}

llvm/utils/lit/lit/Test.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -219,11 +219,12 @@ def getExecPath(self, components):
219219
class Test:
220220
"""Test - Information on a single test instance."""
221221

222-
def __init__(self, suite, path_in_suite, config, file_path = None):
222+
def __init__(self, suite, path_in_suite, config, file_path = None, gtest_json_file = None):
223223
self.suite = suite
224224
self.path_in_suite = path_in_suite
225225
self.config = config
226226
self.file_path = file_path
227+
self.gtest_json_file = gtest_json_file
227228

228229
# A list of conditions under which this test is expected to fail.
229230
# Each condition is a boolean expression of features and target
@@ -258,7 +259,7 @@ def __init__(self, suite, path_in_suite, config, file_path = None):
258259
# The previous test elapsed time, if applicable.
259260
self.previous_elapsed = 0.0
260261

261-
if '/'.join(path_in_suite) in suite.test_times:
262+
if suite.test_times and '/'.join(path_in_suite) in suite.test_times:
262263
time = suite.test_times['/'.join(path_in_suite)]
263264
self.previous_elapsed = abs(time)
264265
self.previous_failure = time < 0

llvm/utils/lit/lit/TestingConfig.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def fromdefaults(litConfig):
2828
'TMPDIR', 'TMP', 'TEMP', 'TEMPDIR', 'AVRLIT_BOARD',
2929
'AVRLIT_PORT', 'FILECHECK_OPTS', 'VCINSTALLDIR',
3030
'VCToolsinstallDir', 'VSINSTALLDIR', 'WindowsSdkDir',
31-
'WindowsSDKLibVersion', 'SOURCE_DATE_EPOCH']
31+
'WindowsSDKLibVersion', 'SOURCE_DATE_EPOCH','GTEST_FILTER']
3232

3333
if sys.platform == 'win32':
3434
pass_vars.append('COMSPEC')
Lines changed: 136 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
from __future__ import absolute_import
2+
import json
3+
import math
24
import os
3-
import re
45
import shlex
5-
import subprocess
66
import sys
77

88
import lit.Test
@@ -25,74 +25,19 @@ def __init__(self, test_sub_dirs, test_suffix, run_under = []):
2525
self.test_suffixes = {exe_suffix, test_suffix + '.py'}
2626
self.run_under = run_under
2727

28-
def getGTestTests(self, path, litConfig, localConfig):
29-
"""getGTestTests(path) - [name]
30-
31-
Return the tests available in gtest executable.
32-
33-
Args:
34-
path: String path to a gtest executable
35-
litConfig: LitConfig instance
36-
localConfig: TestingConfig instance"""
37-
38-
list_test_cmd = self.prepareCmd([path, '--gtest_list_tests'])
39-
40-
try:
41-
output = subprocess.check_output(list_test_cmd,
42-
env=localConfig.environment)
43-
except subprocess.CalledProcessError as exc:
44-
litConfig.warning(
45-
"unable to discover google-tests in %r: %s. Process output: %s"
46-
% (path, sys.exc_info()[1], exc.output))
47-
# This doesn't look like a valid gtest file. This can
48-
# have a number of causes, none of them good. For
49-
# instance, we could have created a broken executable.
50-
# Alternatively, someone has cruft in their test
51-
# directory. If we don't return a test here, then no
52-
# failures will get reported, so return a dummy test name
53-
# so that the failure is reported later.
54-
yield 'failed_to_discover_tests_from_gtest'
55-
return
56-
57-
upstream_prefix = re.compile('Running main\(\) from .*gtest_main\.cc')
58-
nested_tests = []
59-
for ln in output.splitlines(False): # Don't keep newlines.
60-
ln = lit.util.to_string(ln)
61-
62-
if upstream_prefix.fullmatch(ln):
63-
# Upstream googletest prints this to stdout prior to running
64-
# tests. LLVM removed that print statement in r61540, but we
65-
# handle it here in case upstream googletest is being used.
66-
continue
67-
68-
# The test name list includes trailing comments beginning with
69-
# a '#' on some lines, so skip those. We don't support test names
70-
# that use escaping to embed '#' into their name as the names come
71-
# from C++ class and method names where such things are hard and
72-
# uninteresting to support.
73-
ln = ln.split('#', 1)[0].rstrip()
74-
if not ln.lstrip():
75-
continue
76-
77-
index = 0
78-
while ln[index*2:index*2+2] == ' ':
79-
index += 1
80-
while len(nested_tests) > index:
81-
nested_tests.pop()
82-
83-
ln = ln[index*2:]
84-
if ln.endswith('.'):
85-
nested_tests.append(ln)
86-
elif any([name.startswith('DISABLED_')
87-
for name in nested_tests + [ln]]):
88-
# Gtest will internally skip these tests. No need to launch a
89-
# child process for it.
90-
continue
91-
else:
92-
yield ''.join(nested_tests) + ln
28+
def get_num_tests(self, path, localConfig):
29+
cmd = [path, '--gtest_list_tests', '--gtest_filter=-*DISABLED_*']
30+
if cmd[0].endswith('.py'):
31+
cmd = [sys.executable] + cmd
32+
out, _, exitCode = lit.util.executeCommand(cmd, env=localConfig.environment)
33+
if exitCode == 0:
34+
return sum(map(lambda line: line.startswith(' '), out.splitlines()))
35+
return None
9336

9437
def getTestsInDirectory(self, testSuite, path_in_suite,
9538
litConfig, localConfig):
39+
init_shard_size = 512 # number of tests in a shard
40+
core_count = lit.util.usable_core_count()
9641
source_path = testSuite.getSourcePath(path_in_suite)
9742
for subdir in self.test_sub_dirs:
9843
dir_path = os.path.join(source_path, subdir)
@@ -102,52 +47,97 @@ def getTestsInDirectory(self, testSuite, path_in_suite,
10247
suffixes=self.test_suffixes):
10348
# Discover the tests in this executable.
10449
execpath = os.path.join(source_path, subdir, fn)
105-
testnames = self.getGTestTests(execpath, litConfig, localConfig)
106-
for testname in testnames:
107-
testPath = path_in_suite + (subdir, fn, testname)
108-
yield lit.Test.Test(testSuite, testPath, localConfig,
109-
file_path=execpath)
50+
num_tests = self.get_num_tests(execpath, localConfig)
51+
if num_tests is not None:
52+
# Compute the number of shards.
53+
shard_size = init_shard_size
54+
nshard = int(math.ceil(num_tests/shard_size))
55+
while nshard < core_count and shard_size > 1:
56+
shard_size = shard_size//2
57+
nshard = int(math.ceil(num_tests/shard_size))
58+
59+
# Create one lit test for each shard.
60+
for idx in range(nshard):
61+
testPath = path_in_suite + (subdir, fn,
62+
str(idx), str(nshard))
63+
json_file = '-'.join([execpath, testSuite.config.name,
64+
str(os.getpid()), str(idx),
65+
str(nshard)]) + '.json'
66+
yield lit.Test.Test(testSuite, testPath, localConfig,
67+
file_path=execpath,
68+
gtest_json_file=json_file)
69+
else:
70+
# This doesn't look like a valid gtest file. This can
71+
# have a number of causes, none of them good. For
72+
# instance, we could have created a broken executable.
73+
# Alternatively, someone has cruft in their test
74+
# directory. If we don't return a test here, then no
75+
# failures will get reported, so return a dummy test name
76+
# so that the failure is reported later.
77+
testPath = path_in_suite + (subdir, fn, 'failed_to_discover_tests_from_gtest')
78+
yield lit.Test.Test(testSuite, testPath, localConfig, file_path=execpath)
11079

11180
def execute(self, test, litConfig):
81+
if test.gtest_json_file is None:
82+
return lit.Test.FAIL, ''
83+
11284
testPath,testName = os.path.split(test.getSourcePath())
11385
while not os.path.exists(testPath):
11486
# Handle GTest parametrized and typed tests, whose name includes
11587
# some '/'s.
11688
testPath, namePrefix = os.path.split(testPath)
11789
testName = namePrefix + '/' + testName
11890

119-
cmd = [testPath, '--gtest_filter=' + testName]
91+
testName,total_shards = os.path.split(testName)
92+
testName,shard_idx = os.path.split(testName)
93+
shard_env = {'GTEST_COLOR':'no','GTEST_TOTAL_SHARDS':total_shards, 'GTEST_SHARD_INDEX':shard_idx, 'GTEST_OUTPUT':'json:'+test.gtest_json_file}
94+
test.config.environment.update(shard_env)
95+
96+
cmd = [testPath]
12097
cmd = self.prepareCmd(cmd)
12198
if litConfig.useValgrind:
12299
cmd = litConfig.valgrindArgs + cmd
123100

124101
if litConfig.noExecute:
125102
return lit.Test.PASS, ''
126103

127-
header = f"Script:\n--\n{' '.join(cmd)}\n--\n"
104+
shard_envs= '\n'.join([k + '=' + v for k, v in shard_env.items()])
105+
shard_header = f"Script(shard):\n--\n{shard_envs}\n{' '.join(cmd)}\n--\n"
128106

129107
try:
130-
out, err, exitCode = lit.util.executeCommand(
108+
_, _, exitCode = lit.util.executeCommand(
131109
cmd, env=test.config.environment,
132110
timeout=litConfig.maxIndividualTestTime)
133111
except lit.util.ExecuteCommandTimeoutException:
134112
return (lit.Test.TIMEOUT,
135-
f'{header}Reached timeout of '
113+
f'{shard_header}Reached timeout of '
136114
f'{litConfig.maxIndividualTestTime} seconds')
137115

138-
if exitCode:
139-
return lit.Test.FAIL, header + out + err
140-
141-
if '[ SKIPPED ] 1 test,' in out:
142-
return lit.Test.SKIPPED, ''
143-
144-
passing_test_line = '[ PASSED ] 1 test.'
145-
if passing_test_line not in out:
146-
return (lit.Test.UNRESOLVED,
147-
f'{header}Unable to find {passing_test_line} '
148-
f'in gtest output:\n\n{out}{err}')
116+
if not os.path.exists(test.gtest_json_file):
117+
errmsg = f"shard JSON output does not exist: %s" % (test.gtest_json_file)
118+
return lit.Test.FAIL, shard_header + errmsg
149119

150-
return lit.Test.PASS,''
120+
if exitCode:
121+
output = shard_header + '\n'
122+
with open(test.gtest_json_file, encoding='utf-8') as f:
123+
testsuites = json.load(f)['testsuites']
124+
for testcase in testsuites:
125+
for testinfo in testcase['testsuite']:
126+
if testinfo['result'] == 'SUPPRESSED' or testinfo['result'] == 'SKIPPED':
127+
continue
128+
testname = testcase['name'] + '.' + testinfo['name']
129+
header = f"Script:\n--\n{' '.join(cmd)} --gtest_filter={testname}\n--\n"
130+
if 'failures' in testinfo:
131+
output += header
132+
for fail in testinfo['failures']:
133+
output += fail['failure'] + '\n'
134+
output += '\n'
135+
elif testinfo['result'] != 'COMPLETED':
136+
output += header
137+
output += 'unresolved test result\n'
138+
return lit.Test.FAIL, output
139+
else:
140+
return lit.Test.PASS, ''
151141

152142
def prepareCmd(self, cmd):
153143
"""Insert interpreter if needed.
@@ -166,3 +156,61 @@ def prepareCmd(self, cmd):
166156
else:
167157
cmd = shlex.split(self.run_under) + cmd
168158
return cmd
159+
160+
@staticmethod
161+
def post_process_shard_results(selected_tests, discovered_tests):
162+
def remove_gtest(tests):
163+
idxs = []
164+
for idx, t in enumerate(tests):
165+
if t.gtest_json_file:
166+
idxs.append(idx)
167+
for i in range(len(idxs)):
168+
del tests[idxs[i]-i]
169+
170+
remove_gtest(discovered_tests)
171+
gtests = [t for t in selected_tests if t.gtest_json_file]
172+
remove_gtest(selected_tests)
173+
for test in gtests:
174+
# In case gtest has bugs such that no JSON file was emitted.
175+
if not os.path.exists(test.gtest_json_file):
176+
selected_tests.append(test)
177+
discovered_tests.append(test)
178+
continue
179+
180+
# Load json file to retrieve results.
181+
with open(test.gtest_json_file, encoding='utf-8') as f:
182+
testsuites = json.load(f)['testsuites']
183+
for testcase in testsuites:
184+
for testinfo in testcase['testsuite']:
185+
# Ignore disabled tests.
186+
if testinfo['result'] == 'SUPPRESSED':
187+
continue
188+
189+
testPath = test.path_in_suite[:-2] + (testcase['name'], testinfo['name'])
190+
subtest = lit.Test.Test(test.suite, testPath,
191+
test.config, test.file_path)
192+
193+
testname = testcase['name'] + '.' + testinfo['name']
194+
header = f"Script:\n--\n{test.file_path} --gtest_filter={testname}\n--\n"
195+
196+
output = ''
197+
if testinfo['result'] == 'SKIPPED':
198+
returnCode = lit.Test.SKIPPED
199+
elif 'failures' in testinfo:
200+
returnCode = lit.Test.FAIL
201+
output = header
202+
for fail in testinfo['failures']:
203+
output += fail['failure'] + '\n'
204+
elif testinfo['result'] == 'COMPLETED':
205+
returnCode = lit.Test.PASS
206+
else:
207+
returnCode = lit.Test.UNRESOLVED
208+
output = header + 'unresolved test result\n'
209+
210+
subtest.setResult(lit.Test.Result(returnCode, output, float(testinfo['time'][:-1])))
211+
212+
selected_tests.append(subtest)
213+
discovered_tests.append(subtest)
214+
os.remove(test.gtest_json_file)
215+
216+
return selected_tests, discovered_tests

llvm/utils/lit/lit/main.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import lit.run
1919
import lit.Test
2020
import lit.util
21+
from lit.formats.googletest import GoogleTest
2122
from lit.TestTimes import record_test_times
2223

2324

@@ -108,6 +109,9 @@ def main(builtin_params={}):
108109

109110
record_test_times(selected_tests, lit_config)
110111

112+
selected_tests, discovered_tests = GoogleTest.post_process_shard_results(
113+
selected_tests, discovered_tests)
114+
111115
if opts.time_tests:
112116
print_histogram(discovered_tests)
113117

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
#!/usr/bin/env python
2+
3+
import os
4+
import sys
5+
6+
if len(sys.argv) == 3 and sys.argv[1] == "--gtest_list_tests":
7+
if sys.argv[2] != '--gtest_filter=-*DISABLED_*':
8+
raise ValueError("unexpected argument: %s" % (sys.argv[2]))
9+
print("""\
10+
FirstTest.
11+
subTestA
12+
subTestB
13+
subTestC
14+
subTestD
15+
ParameterizedTest/0.
16+
subTest
17+
ParameterizedTest/1.
18+
subTest""")
19+
sys.exit(0)
20+
elif len(sys.argv) != 1:
21+
# sharding and json output are specified using environment variables
22+
raise ValueError("unexpected argument: %r" % (' '.join(sys.argv[1:])))
23+
24+
for e in ['GTEST_TOTAL_SHARDS', 'GTEST_SHARD_INDEX', 'GTEST_OUTPUT']:
25+
if e not in os.environ:
26+
raise ValueError("missing environment variables: " + e)
27+
28+
if not os.environ['GTEST_OUTPUT'].startswith('json:'):
29+
raise ValueError("must emit json output: " + os.environ['GTEST_OUTPUT'])
30+
31+
dummy_output = """\
32+
{
33+
"testsuites": [
34+
]
35+
}"""
36+
37+
if os.environ['GTEST_SHARD_INDEX'] == '0':
38+
exit_code = 1
39+
else:
40+
json_filename = os.environ['GTEST_OUTPUT'].split(':', 1)[1]
41+
with open(json_filename, 'w') as f:
42+
f.write(dummy_output)
43+
exit_code = 0
44+
45+
sys.exit(exit_code)

0 commit comments

Comments
 (0)