Skip to content

Commit 5bb3460

Browse files
authored
Timeout and Retry FTL failures and mark flakiness
1 parent d878458 commit 5bb3460

File tree

5 files changed

+170
-64
lines changed

5 files changed

+170
-64
lines changed

.github/workflows/integration_tests.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -840,6 +840,7 @@ jobs:
840840
if: steps.get-device-type.outputs.device_type == 'real'
841841
uses: google-github-actions/setup-gcloud@master
842842
- name: Run Android integration tests on Real Device via FTL
843+
timeout-minutes: 60
843844
if: steps.get-device-type.outputs.device_type == 'real'
844845
run: |
845846
python scripts/gha/restore_secrets.py --passphrase "${{ secrets.TEST_SECRET }}"
@@ -934,6 +935,8 @@ jobs:
934935
if: steps.get-device-type.outputs.device_type == 'real'
935936
uses: google-github-actions/setup-gcloud@master
936937
- name: Run iOS integration tests on Real Device via FTL
938+
# max 3 retry and 10m timeout for each testapp, plus other steps
939+
timeout-minutes: 60
937940
if: steps.get-device-type.outputs.device_type == 'real'
938941
run: |
939942
python scripts/gha/restore_secrets.py --passphrase "${{ secrets.TEST_SECRET }}"

scripts/gha/integration_testing/test_validation.py

Lines changed: 59 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ def summarize_test_results(tests, platform, summary_dir, file_name="summary.log"
150150
successes = []
151151
failures = []
152152
errors = []
153+
success_testapp_paths = set()
153154

154155
for test in tests:
155156
results = validate_results(test.logs, platform)
@@ -160,6 +161,7 @@ def summarize_test_results(tests, platform, summary_dir, file_name="summary.log"
160161
failures.append(test_result_pair)
161162
else:
162163
successes.append(test_result_pair)
164+
success_testapp_paths.add(test.testapp_path)
163165

164166
# First log the successes, then the failures and errors, then the summary.
165167
# This way, debugging will involve checking the summary at the bottom,
@@ -174,6 +176,11 @@ def summarize_test_results(tests, platform, summary_dir, file_name="summary.log"
174176
for test, _ in errors:
175177
logging.info("%s didn't finish normally.\n%s", test.testapp_path, test.logs)
176178

179+
# Testapps that failed first, but succeed after retry. (max 3 retry)
180+
flaky_testapps = []
181+
failures_exclude_flakiness = []
182+
errors_exclude_flakiness = []
183+
177184
# The summary is much more terse, to minimize the time it takes to understand
178185
# what went wrong, without necessarily providing full debugging context.
179186
summary = []
@@ -184,7 +191,12 @@ def summarize_test_results(tests, platform, summary_dir, file_name="summary.log"
184191
if errors:
185192
summary.append("\n%d TESTAPPS EXPERIENCED ERRORS:" % len(errors))
186193
for test, results in errors:
187-
summary.append("%s:" % test.testapp_path)
194+
summary.append("\n%s:" % test.testapp_path)
195+
if test.testapp_path in success_testapp_paths:
196+
summary.append("THIS TESTAPP IS FLAKY")
197+
flaky_testapps.append((test, results))
198+
else:
199+
errors_exclude_flakiness.append((test, results))
188200
if hasattr(test, "ftl_link") and test.ftl_link:
189201
summary.append("ftl_link: %s" % test.ftl_link)
190202
if hasattr(test, "raw_result_link") and test.raw_result_link:
@@ -198,7 +210,12 @@ def summarize_test_results(tests, platform, summary_dir, file_name="summary.log"
198210
if failures:
199211
summary.append("\n%d TESTAPPS FAILED:" % len(failures))
200212
for test, results in failures:
201-
summary.append(test.testapp_path)
213+
summary.append("\n%s:" % test.testapp_path)
214+
if test.testapp_path in success_testapp_paths:
215+
summary.append("THIS TESTAPP IS FLAKY")
216+
flaky_testapps.append((test, results))
217+
else:
218+
failures_exclude_flakiness.append((test, results))
202219
if hasattr(test, "ftl_link") and test.ftl_link:
203220
summary.append("ftl_link: %s" % test.ftl_link)
204221
if hasattr(test, "raw_result_link") and test.raw_result_link:
@@ -208,44 +225,72 @@ def summarize_test_results(tests, platform, summary_dir, file_name="summary.log"
208225
"%d TESTAPPS TOTAL: %d PASSES, %d FAILURES, %d ERRORS"
209226
% (len(tests), len(successes), len(failures), len(errors)))
210227

228+
if len(flaky_testapps) > 0 and len(flaky_testapps) == len(failures) + len(errors):
229+
logging.info("All failures and errors are due to flakiness.")
230+
summary.append("ALL THE FOLLOWING FAILURES AND ERRORS ARE DUE TO FLAKINESS:(")
231+
211232
# summary_json format:
212233
# { "type": "test",
213234
# "testapps": [testapp],
214-
# "errors": {testapp:{"log": error_log, "ftl_link": ftl_link, "raw_result_link": raw_result_link}},
215-
# "failures": {testapp:{"log": error_log, "ftl_link": ftl_link, "raw_result_link": raw_result_link,
216-
# "failed_tests": {falied_test: test_log}}}}}
235+
# "errors": {testapp:{"logs": [error_log], "ftl_links": [ftl_link], "raw_result_links": [raw_result_link]}},
236+
# "failures": {testapp:{"logs": [error_log], "ftl_links": [ftl_link], "raw_result_links": [raw_result_link],
237+
# "failed_tests": {failed_test: test_log}}},
238+
# "flakiness": {testapp:{"logs": [error_log], "ftl_links": [ftl_link], "raw_result_links": [raw_result_link],
239+
# "flaky_tests": {flaky_test: test_log}}}}
217240
summary_json = {}
218241
summary_json["type"] = "test"
219242
summary_json["testapps"] = [get_name(test.testapp_path) for test in tests]
220-
summary_json["errors"] = {get_name(test.testapp_path):{"logs": results.summary} for (test, results) in errors}
221-
for (test, results) in errors:
243+
summary_json["errors"] = {get_name(test.testapp_path):{"logs": [], "ftl_links": [], "raw_result_links": []} for (test, _) in errors_exclude_flakiness}
244+
for (test, results) in errors_exclude_flakiness:
222245
testapp = get_name(test.testapp_path)
246+
summary_json["errors"][testapp]["logs"].append(results.summary)
223247
if hasattr(test, "ftl_link") and test.ftl_link:
224-
summary_json["errors"][testapp]["ftl_link"] = test.ftl_link
248+
summary_json["errors"][testapp]["ftl_links"].append(test.ftl_link)
225249
if hasattr(test, "raw_result_link") and test.raw_result_link:
226-
summary_json["errors"][testapp]["raw_result_link"] = test.raw_result_link
227-
summary_json["failures"] = {get_name(test.testapp_path):{"logs": results.summary, "failed_tests": dict()} for (test, results) in failures}
228-
for (test, results) in failures:
250+
summary_json["errors"][testapp]["raw_result_links"].append(test.raw_result_link)
251+
summary_json["failures"] = {get_name(test.testapp_path):{"logs": [], "ftl_links": [], "raw_result_links": [], "failed_tests": dict()} for (test, _) in failures_exclude_flakiness}
252+
for (test, results) in failures_exclude_flakiness:
229253
testapp = get_name(test.testapp_path)
254+
summary_json["failures"][testapp]["logs"].append(results.summary)
230255
if hasattr(test, "ftl_link") and test.ftl_link:
231-
summary_json["failures"][testapp]["ftl_link"] = test.ftl_link
256+
summary_json["failures"][testapp]["ftl_links"].append(test.ftl_link)
232257
if hasattr(test, "raw_result_link") and test.raw_result_link:
233-
summary_json["failures"][testapp]["raw_result_link"] = test.raw_result_link
258+
summary_json["failures"][testapp]["raw_result_links"].append(test.raw_result_link)
234259
failed_tests = re.findall(r"\[ FAILED \] (.+)[.](.+)", results.summary)
235260
for failed_test in failed_tests:
236261
failed_test = failed_test[0] + "." + failed_test[1]
237262
pattern = fr'\[ RUN \] {failed_test}(.*?)\[ FAILED \] {failed_test}'
238263
failure_log = re.search(pattern, test.logs, re.MULTILINE | re.DOTALL)
239264
summary_json["failures"][testapp]["failed_tests"][failed_test] = failure_log.group()
240265
summary.append("\n%s FAILED:\n%s\n" % (failed_test, failure_log.group()))
266+
summary_json["flakiness"] = {get_name(test.testapp_path):{"logs": [], "ftl_links": [], "raw_result_links": [], "flaky_tests": dict()} for (test, _) in flaky_testapps}
267+
for (test, results) in flaky_testapps:
268+
testapp = get_name(test.testapp_path)
269+
summary_json["flakiness"][testapp]["logs"].append(results.summary)
270+
if hasattr(test, "ftl_link") and test.ftl_link:
271+
summary_json["flakiness"][testapp]["ftl_links"].append(test.ftl_link)
272+
if hasattr(test, "raw_result_link") and test.raw_result_link:
273+
summary_json["flakiness"][testapp]["raw_result_links"].append(test.raw_result_link)
274+
flaky_tests = re.findall(r"\[ FAILED \] (.+)[.](.+)", results.summary)
275+
for flaky_test in flaky_tests:
276+
flaky_test = flaky_test[0] + "." + flaky_test[1]
277+
pattern = fr'\[ RUN \] {flaky_test}(.*?)\[ FAILED \] {flaky_test}'
278+
failure_log = re.search(pattern, test.logs, re.MULTILINE | re.DOTALL)
279+
if failure_log:
280+
summary_json["flakiness"][testapp]["flaky_tests"][flaky_test] = failure_log.group()
281+
summary.append("\n%s FAILED:\n%s\n" % (flaky_test, failure_log.group()))
241282

242283
with open(os.path.join(summary_dir, file_name+".json"), "a") as f:
243284
f.write(json.dumps(summary_json, indent=2))
244285

245286
summary = "\n".join(summary)
246287
write_summary(summary_dir, summary, file_name)
247288

248-
return 0 if len(tests) == len(successes) else 1
289+
# success or only flakiness
290+
if len(tests) == len(successes) or len(flaky_testapps) == len(failures) + len(errors):
291+
return 0
292+
else:
293+
return 1
249294

250295

251296
def write_summary(testapp_dir, summary, file_name="summary.log"):

scripts/gha/it_workflow.py

Lines changed: 43 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -60,13 +60,17 @@
6060
_LABEL_SUCCEED = "tests: succeeded"
6161

6262
_COMMENT_TITLE_PROGESS = "### ⏳  Integration test in progress...\n"
63+
_COMMENT_TITLE_PROGESS_FLAKY = "### Integration test with FLAKINESS (but still ⏳  in progress)\n"
6364
_COMMENT_TITLE_PROGESS_FAIL = "### ❌  Integration test FAILED (but still ⏳  in progress)\n"
65+
_COMMENT_TITLE_FLAKY = "### Integration test with FLAKINESS (succeeded after retry)\n"
6466
_COMMENT_TITLE_FAIL = "### ❌  Integration test FAILED\n"
6567
_COMMENT_TITLE_SUCCEED = "### ✅  Integration test succeeded!\n"
66-
_COMMENT_TITLE_FAIL_SDK = "\n***\n### ❌  Integration test FAILED (build against SDK)\n"
67-
_COMMENT_TITLE_SUCCEED_SDK = "\n***\n### ✅  Integration test succeeded! (build against SDK)\n"
68-
_COMMENT_TITLE_FAIL_REPO = "### ❌  Integration test FAILED (build against repo)\n"
69-
_COMMENT_TITLE_SUCCEED_REPO = "### ✅  Integration test succeeded! (build against repo)\n"
68+
_COMMENT_TITLE_FLAKY_SDK = "\n***\n### [build against SDK] Integration test with FLAKINESS (succeeded after retry)\n"
69+
_COMMENT_TITLE_FAIL_SDK = "\n***\n### ❌  [build against SDK] Integration test FAILED\n"
70+
_COMMENT_TITLE_SUCCEED_SDK = "\n***\n### ✅  [build against SDK] Integration test succeeded!\n"
71+
_COMMENT_TITLE_FLAKY_REPO = "### [build against repo] Integration test with FLAKINESS (succeeded after retry)\n"
72+
_COMMENT_TITLE_FAIL_REPO = "### ❌  [build against repo] Integration test FAILED\n"
73+
_COMMENT_TITLE_SUCCEED_REPO = "### ✅  [build against repo] Integration test succeeded!\n"
7074

7175
_COMMENT_FLAKY_TRACKER = "\n\nAdd flaky tests to **[go/fpl-cpp-flake-tracker](http://go/fpl-cpp-flake-tracker)**\n"
7276

@@ -136,12 +140,19 @@ def test_start(token, issue_number, actor, commit, run_id):
136140
def test_progress(token, issue_number, actor, commit, run_id):
137141
"""In PR, when some test failed, update failure info and
138142
add label \"tests: failed\""""
139-
log_summary = _get_summary_talbe(token, run_id)
140-
if log_summary == 0:
143+
success_or_only_flakiness, log_summary = _get_summary_table(token, run_id)
144+
if success_or_only_flakiness and not log_summary:
145+
# succeeded (without flakiness)
141146
return
142147
else:
143-
github.add_label(token, issue_number, _LABEL_FAILED)
144-
comment = (_COMMENT_TITLE_PROGESS_FAIL +
148+
if success_or_only_flakiness:
149+
# all failures/errors are due to flakiness (succeeded after retry)
150+
title = _COMMENT_TITLE_PROGESS_FLAKY
151+
else:
152+
# failures/errors still exist after retry
153+
title = _COMMENT_TITLE_PROGESS_FAIL
154+
github.add_label(token, issue_number, _LABEL_FAILED)
155+
comment = (title +
145156
_get_description(actor, commit, run_id) +
146157
log_summary +
147158
_COMMENT_FLAKY_TRACKER +
@@ -153,16 +164,24 @@ def test_end(token, issue_number, actor, commit, run_id, new_token):
153164
"""In PR, when some test end, update Test Result Report and
154165
update label: add \"tests: failed\" if test failed, add label
155166
\"tests: succeeded\" if test succeed"""
156-
log_summary = _get_summary_talbe(token, run_id)
157-
if log_summary == 0:
167+
success_or_only_flakiness, log_summary = _get_summary_table(token, run_id)
168+
if success_or_only_flakiness and not log_summary:
169+
# succeeded (without flakiness)
158170
github.add_label(token, issue_number, _LABEL_SUCCEED)
159171
comment = (_COMMENT_TITLE_SUCCEED +
160172
_get_description(actor, commit, run_id) +
161173
_COMMENT_SUFFIX)
162174
_update_comment(token, issue_number, comment)
163175
else:
164-
github.add_label(token, issue_number, _LABEL_FAILED)
165-
comment = (_COMMENT_TITLE_FAIL +
176+
if success_or_only_flakiness:
177+
# all failures/errors are due to flakiness (succeeded after retry)
178+
title = _COMMENT_TITLE_FLAKY
179+
github.add_label(token, issue_number, _LABEL_SUCCEED)
180+
else:
181+
# failures/errors still exist after retry
182+
title = _COMMENT_TITLE_FAIL
183+
github.add_label(token, issue_number, _LABEL_FAILED)
184+
comment = (title +
166185
_get_description(actor, commit, run_id) +
167186
log_summary +
168187
_COMMENT_FLAKY_TRACKER +
@@ -180,12 +199,18 @@ def test_report(token, actor, commit, run_id, build_against):
180199
issue_number = _get_issue_number(token, _REPORT_TITLE, _REPORT_LABEL)
181200
previous_comment = github.get_issue_body(token, issue_number)
182201
[previous_comment_repo, previous_comment_sdk] = previous_comment.split(_COMMENT_SUFFIX)
183-
log_summary = _get_summary_talbe(token, run_id)
184-
if log_summary == 0:
202+
success_or_only_flakiness, log_summary = _get_summary_table(token, run_id)
203+
if success_or_only_flakiness and not log_summary:
204+
# succeeded (without flakiness)
185205
title = _COMMENT_TITLE_SUCCEED_REPO if build_against==_BUILD_AGAINST_REPO else _COMMENT_TITLE_SUCCEED_SDK
186206
comment = title + _get_description(actor, commit, run_id)
187207
else:
188-
title = _COMMENT_TITLE_FAIL_REPO if build_against==_BUILD_AGAINST_REPO else _COMMENT_TITLE_FAIL_SDK
208+
if success_or_only_flakiness:
209+
# all failures/errors are due to flakiness (succeeded after retry)
210+
title = _COMMENT_TITLE_FLAKY_REPO if build_against==_BUILD_AGAINST_REPO else _COMMENT_TITLE_FLAKY_SDK
211+
else:
212+
# failures/errors still exist after retry
213+
title = _COMMENT_TITLE_FAIL_REPO if build_against==_BUILD_AGAINST_REPO else _COMMENT_TITLE_FAIL_SDK
189214
comment = title + _get_description(actor, commit, run_id) + log_summary + _COMMENT_FLAKY_TRACKER
190215

191216
if build_against==_BUILD_AGAINST_REPO:
@@ -239,16 +264,10 @@ def _get_datetime():
239264
return pst_now.strftime("%a %b %e %H:%M %Z %G")
240265

241266

242-
def _get_summary_talbe(token, run_id):
267+
def _get_summary_table(token, run_id):
243268
"""Test Result Report Body, which is failed test table with markdown format"""
244-
# artifact_id only exist after workflow finishs running
245-
# Thus, "down artifact" logic is in the workflow
246-
# artifact_id = _get_artifact_id(token, run_id, _LOG_ARTIFACT_NAME)
247-
# artifact_path = _LOG_ARTIFACT_NAME + ".zip"
248-
# github.download_artifact(token, artifact_id, artifact_path)
249-
# shutil.unpack_archive(artifact_path, _LOG_OUTPUT_DIR)
250-
summary_talbe = summarize.summarize_logs(dir=_LOG_OUTPUT_DIR, markdown=True)
251-
return summary_talbe
269+
# return (success_or_only_flakiness, failed_test_summary_table)
270+
return summarize.summarize_logs(dir=_LOG_OUTPUT_DIR, markdown=True)
252271

253272

254273
def _get_artifact_id(token, run_id, name):

scripts/gha/summarize_test_results.py

Lines changed: 31 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -200,50 +200,57 @@ def summarize_logs(dir, markdown=False, github_log=False):
200200
test_log_name_re = re.escape(
201201
os.path.join(dir,TEST_FILE_PATTERN)).replace("\\*", "(.*)")
202202

203-
any_failures = False
203+
success_or_only_flakiness = True
204204
log_data = {}
205205
# log_data format:
206206
# { testapps: {"build": [configs]},
207207
# {"test": {"errors": [configs]},
208-
# {"failures": {failed_test: [configs]}}}}
208+
# {"failures": {failed_test: [configs]}},
209+
# {"flakiness": {flaky_test: [configs]}}}}
209210
for build_log_file in build_log_files:
210211
configs = get_configs_from_file_name(build_log_file, build_log_name_re)
211212
with open(build_log_file, "r") as log_reader:
212213
log_text = log_reader.read()
213214
if "__SUMMARY_MISSING__" in log_text:
214-
any_failures = True
215+
success_or_only_flakiness = False
215216
log_data.setdefault(MISSING_LOG, {}).setdefault("build", []).append(configs)
216217
else:
217218
log_reader_data = json.loads(log_text)
218-
for (testapp, error) in log_reader_data["errors"].items():
219-
any_failures = True
219+
for (testapp, _) in log_reader_data["errors"].items():
220+
success_or_only_flakiness = False
220221
log_data.setdefault(testapp, {}).setdefault("build", []).append(configs)
221222

222223
for test_log_file in test_log_files:
223224
configs = get_configs_from_file_name(test_log_file, test_log_name_re)
224225
with open(test_log_file, "r") as log_reader:
225226
log_text = log_reader.read()
226227
if "__SUMMARY_MISSING__" in log_text:
227-
any_failures = True
228+
success_or_only_flakiness = False
228229
log_data.setdefault(MISSING_LOG, {}).setdefault("test", {}).setdefault("errors", []).append(configs)
229230
else:
230231
log_reader_data = json.loads(log_text)
231-
for (testapp, error) in log_reader_data["errors"].items():
232-
any_failures = True
232+
for (testapp, _) in log_reader_data["errors"].items():
233+
success_or_only_flakiness = False
233234
log_data.setdefault(testapp, {}).setdefault("test", {}).setdefault("errors", []).append(configs)
234235
for (testapp, failures) in log_reader_data["failures"].items():
235-
for (test, failure) in failures["failed_tests"].items():
236-
any_failures = True
236+
for (test, _) in failures["failed_tests"].items():
237+
success_or_only_flakiness = False
237238
log_data.setdefault(testapp, {}).setdefault("test", {}).setdefault("failures", {}).setdefault(test, []).append(configs)
238-
239+
for (testapp, flakiness) in log_reader_data["flakiness"].items():
240+
if flakiness["flaky_tests"].items():
241+
for (test, _) in flakiness["flaky_tests"].items():
242+
log_data.setdefault(testapp, {}).setdefault("test", {}).setdefault("flakiness", {}).setdefault(test, []).append(configs)
243+
else:
244+
log_data.setdefault(testapp, {}).setdefault("test", {}).setdefault("flakiness", {}).setdefault("CRASH/TIMEOUT", []).append(configs)
245+
246+
if success_or_only_flakiness and not log_data:
247+
# No failures and no flakiness occurred, nothing to log.
248+
return (success_or_only_flakiness, None)
249+
250+
# if failures (include flakiness) exist:
239251
# log_results format:
240252
# { testapps: {configs: [failed tests]} }
241253
log_results = reorganize_log(log_data)
242-
243-
if not any_failures:
244-
# No failures occurred, nothing to log.
245-
return(0)
246-
247254
log_lines = []
248255
if markdown:
249256
log_lines = print_markdown_table(log_results)
@@ -255,7 +262,7 @@ def summarize_logs(dir, markdown=False, github_log=False):
255262

256263
log_summary = "\n".join(log_lines)
257264
print(log_summary)
258-
return log_summary
265+
return (success_or_only_flakiness, log_summary)
259266

260267

261268
def get_configs_from_file_name(file_name, file_name_re):
@@ -293,6 +300,13 @@ def reorganize_log(log_data):
293300
all_configs = [["TEST"], ["FAILURE"], [CAPITALIZATIONS[platform]]]
294301
all_configs.extend(config)
295302
log_results.setdefault(testapp, {}).setdefault(flat_config(all_configs), []).append(test)
303+
for (test, configs) in errors.get("test",{}).get("flakiness",{}).items():
304+
combined_configs = combine_configs(configs)
305+
for (platform, configs) in combined_configs.items():
306+
for config in configs:
307+
all_configs = [["TEST"], ["FLAKINESS"], [CAPITALIZATIONS[platform]]]
308+
all_configs.extend(config)
309+
log_results.setdefault(testapp, {}).setdefault(flat_config(all_configs), []).append(test)
296310

297311
return log_results
298312

0 commit comments

Comments
 (0)