[3.5] bpo-29512, bpo-30764, bpo-30776: Backport regrtest enhancements from 3.6 to 3.5 (#2540)

vstinner · web-flow · commit 0b1210739d12 · 2017-07-03T13:16:57.000+02:00
* [3.6] bpo-29512, bpo-30776: Backport regrtest enhancements from master to 3.6 (#2513) * bpo-29512: Add test.bisect, bisect failing tests (#2452) Add a new "python3 -m test.bisect" tool to bisect failing tests. It can be used to find which test method(s) leak references, leak files, etc. (cherry picked from commit 84d9d14) * bpo-30776: regrtest: reduce memleak false positive (#2484) Only report a leak if each run leaks at least one memory block. (cherry picked from commit beeca6e) (cherry picked from commit a3ca94d) * bpo-30764: Fix regrtest --fail-env-changed --forever (#2536) (#2539) --forever now stops if a fail changes the environment. (cherry picked from commit 5e87592) (cherry picked from commit 4132adb)
diff --git a/Lib/test/bisect.py b/Lib/test/bisect.py
@@ -0,0 +1,167 @@
+#!/usr/bin/env python3
+"""
+Command line tool to bisect failing CPython tests.
+
+Find the test_os test method which alters the environment:
+
+    ./python -m test.bisect --fail-env-changed test_os
+
+Find a reference leak in "test_os", write the list of failing tests into the
+"bisect" file:
+
+    ./python -m test.bisect -o bisect -R 3:3 test_os
+
+Load an existing list of tests from a file using -i option:
+
+    ./python -m test --list-cases -m FileTests test_os > tests
+    ./python -m test.bisect -i tests test_os
+"""
+
+import argparse
+import datetime
+import os.path
+import math
+import random
+import subprocess
+import sys
+import tempfile
+import time
+
+
+def write_tests(filename, tests):
+    with open(filename, "w") as fp:
+        for name in tests:
+            print(name, file=fp)
+        fp.flush()
+
+
+def write_output(filename, tests):
+    if not filename:
+        return
+    print("Write %s tests into %s" % (len(tests), filename))
+    write_tests(filename, tests)
+    return filename
+
+
+def format_shell_args(args):
+    return ' '.join(args)
+
+
+def list_cases(args):
+    cmd = [sys.executable, '-m', 'test', '--list-cases']
+    cmd.extend(args.test_args)
+    proc = subprocess.run(cmd,
+                          stdout=subprocess.PIPE,
+                          universal_newlines=True)
+    exitcode = proc.returncode
+    if exitcode:
+        cmd = format_shell_args(cmd)
+        print("Failed to list tests: %s failed with exit code %s"
+              % (cmd, exitcode))
+        sys.exit(exitcode)
+    tests = proc.stdout.splitlines()
+    return tests
+
+
+def run_tests(args, tests, huntrleaks=None):
+    tmp = tempfile.mktemp()
+    try:
+        write_tests(tmp, tests)
+
+        cmd = [sys.executable, '-m', 'test', '--matchfile', tmp]
+        cmd.extend(args.test_args)
+        print("+ %s" % format_shell_args(cmd))
+        proc = subprocess.run(cmd)
+        return proc.returncode
+    finally:
+        if os.path.exists(tmp):
+            os.unlink(tmp)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-i', '--input',
+                        help='Test names produced by --list-tests written '
+                             'into a file. If not set, run --list-tests')
+    parser.add_argument('-o', '--output',
+                        help='Result of the bisection')
+    parser.add_argument('-n', '--max-tests', type=int, default=1,
+                        help='Maximum number of tests to stop the bisection '
+                             '(default: 1)')
+    parser.add_argument('-N', '--max-iter', type=int, default=100,
+                        help='Maximum number of bisection iterations '
+                             '(default: 100)')
+    # FIXME: document that following arguments are test arguments
+
+    args, test_args = parser.parse_known_args()
+    args.test_args = test_args
+    return args
+
+
+def main():
+    args = parse_args()
+
+    if args.input:
+        with open(args.input) as fp:
+            tests = [line.strip() for line in fp]
+    else:
+        tests = list_cases(args)
+
+    print("Start bisection with %s tests" % len(tests))
+    print("Test arguments: %s" % format_shell_args(args.test_args))
+    print("Bisection will stop when getting %s or less tests "
+          "(-n/--max-tests option), or after %s iterations "
+          "(-N/--max-iter option)"
+          % (args.max_tests, args.max_iter))
+    output = write_output(args.output, tests)
+    print()
+
+    start_time = time.monotonic()
+    iteration = 1
+    try:
+        while len(tests) > args.max_tests and iteration <= args.max_iter:
+            ntest = len(tests)
+            ntest = max(ntest // 2, 1)
+            subtests = random.sample(tests, ntest)
+
+            print("[+] Iteration %s: run %s tests/%s"
+                  % (iteration, len(subtests), len(tests)))
+            print()
+
+            exitcode = run_tests(args, subtests)
+
+            print("ran %s tests/%s" % (ntest, len(tests)))
+            print("exit", exitcode)
+            if exitcode:
+                print("Tests failed: use this new subtest")
+                tests = subtests
+                output = write_output(args.output, tests)
+            else:
+                print("Tests succeeded: skip this subtest, try a new subbset")
+            print()
+            iteration += 1
+    except KeyboardInterrupt:
+        print()
+        print("Bisection interrupted!")
+        print()
+
+    print("Tests (%s):" % len(tests))
+    for test in tests:
+        print("* %s" % test)
+    print()
+
+    if output:
+        print("Output written into %s" % output)
+
+    dt = math.ceil(time.monotonic() - start_time)
+    if len(tests) <= args.max_tests:
+        print("Bisection completed in %s iterations and %s"
+              % (iteration, datetime.timedelta(seconds=dt)))
+        sys.exit(1)
+    else:
+        print("Bisection failed after %s iterations and %s"
+              % (iteration, datetime.timedelta(seconds=dt)))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Lib/test/regrtest.py b/Lib/test/regrtest.py
@@ -739,6 +739,8 @@ def test_forever(tests=list(selected)):
                     yield test
                     if bad:
                         return
+                    if ns.fail_env_changed and environment_changed:
+                        return
         tests = test_forever()
         test_count = ''
         test_count_width = 3
@@ -947,7 +949,7 @@ def runtest_accumulate():
         result = "FAILURE"
     elif interrupted:
         result = "INTERRUPTED"
-    elif environment_changed and ns.fail_env_changed:
+    elif ns.fail_env_changed and environment_changed:
         result = "ENV CHANGED"
     else:
         result = "SUCCESS"
@@ -1524,6 +1526,8 @@ def dash_R(the_module, test, indirect_test, huntrleaks):
 
     # These checkers return False on success, True on failure
     def check_rc_deltas(deltas):
+        # Checker for reference counters and memomry blocks.
+        #
         # bpo-30776: Try to ignore false positives:
         #
         #   [3, 0, 0]
@@ -1536,18 +1540,10 @@ def check_rc_deltas(deltas):
         #   [10, 1, 1]
         return all(delta >= 1 for delta in deltas)
 
-    def check_alloc_deltas(deltas):
-        # At least 1/3rd of 0s
-        if 3 * deltas.count(0) < len(deltas):
-            return True
-        # Nothing else than 1s, 0s and -1s
-        if not set(deltas) <= {1,0,-1}:
-            return True
-        return False
     failed = False
     for deltas, item_name, checker in [
         (rc_deltas, 'references', check_rc_deltas),
-        (alloc_deltas, 'memory blocks', check_alloc_deltas)
+        (alloc_deltas, 'memory blocks', check_rc_deltas)
     ]:
         # ignore warmup runs
         deltas = deltas[nwarmup:]