Speed up testpythoneval (#2635)

ambv · gvanrossum · commit 996e3e81adfe · 2017-01-04T20:34:57.000-08:00
Splits eval-test into simple buckets by first letter of test name, enabling
parallel execution. This speeds up execution of the test suite by around 25% on
my laptop. The split enables more consistent loading of all CPU cores during
the entire run of ./runtests.py.

To achieve this, I had to modify testpythoneval.py to not write all testcase
inputs to the same temporary path.

Before:

  SUMMARY  all 204 tasks and 1811 tests passed
  *** OK ***
  total time in run: 554.571954
  total time in check: 214.105742
  total time in lint: 130.914682
  total time in pytest: 92.031659
  ./runtests.py -j4 -v  744.76s user 74.10s system 235% cpu 5:48.34 total

After:

  SUMMARY  all 225 tasks and 3823 tests passed
  *** OK ***
  total time in run: 640.698327
  total time in check: 178.758370
  total time in lint: 149.604402
  total time in pytest: 78.356671
  ./runtests.py -j4 -v  850.81s user 81.09s system 353% cpu 4:23.69 total

Total wall clock time fell from 5:48 to 4:23.

Note: the test sum is now over-reported. Looks like the driver counts also the
filtered out tests in eval-test. I don't have cycles now to hunt this down.
diff --git a/mypy/test/testpythoneval.py b/mypy/test/testpythoneval.py
@@ -10,16 +10,20 @@
       this suite would slow down the main suite too much.
 """
 
+from contextlib import contextmanager
+import errno
 import os
 import os.path
+import re
 import subprocess
 import sys
 
 import typing
+from typing import Dict, List, Tuple
 
 from mypy.myunit import Suite, SkipTestCaseException
 from mypy.test.config import test_data_prefix, test_temp_dir
-from mypy.test.data import parse_test_cases
+from mypy.test.data import DataDrivenTestCase, parse_test_cases
 from mypy.test.helpers import assert_string_arrays_equal
 from mypy.util import try_find_python2_interpreter
 
@@ -33,6 +37,7 @@
 
 # Path to Python 3 interpreter
 python3_path = sys.executable
+program_re = re.compile(r'\b_program.py\b')
 
 
 class PythonEvaluationSuite(Suite):
@@ -48,56 +53,83 @@ def cases(self):
         return c
 
 
-def test_python_evaluation(testcase):
-    python2_interpreter = try_find_python2_interpreter()
-    # Use Python 2 interpreter if running a Python 2 test case.
-    if testcase.name.lower().endswith('python2'):
-        if not python2_interpreter:
+def test_python_evaluation(testcase: DataDrivenTestCase) -> None:
+    """Runs Mypy in a subprocess.
+
+    If this passes without errors, executes the script again with a given Python
+    version.
+    """
+    mypy_cmdline = [
+        python3_path,
+        os.path.join(testcase.old_cwd, 'scripts', 'mypy'),
+        '--show-traceback',
+    ]
+    py2 = testcase.name.lower().endswith('python2')
+    if py2:
+        mypy_cmdline.append('--py2')
+        interpreter = try_find_python2_interpreter()
+        if not interpreter:
             # Skip, can't find a Python 2 interpreter.
             raise SkipTestCaseException()
-        interpreter = python2_interpreter
-        args = ['--py2']
-        py2 = True
     else:
         interpreter = python3_path
-        args = []
-        py2 = False
-    args.append('--show-traceback')
+
     # Write the program to a file.
-    program = '_program.py'
+    program = '_' + testcase.name + '.py'
+    mypy_cmdline.append(program)
     program_path = os.path.join(test_temp_dir, program)
     with open(program_path, 'w') as file:
         for s in testcase.input:
             file.write('{}\n'.format(s))
     # Type check the program.
     # This uses the same PYTHONPATH as the current process.
-    process = subprocess.Popen([python3_path,
-                                os.path.join(testcase.old_cwd, 'scripts', 'mypy')]
-                            + args + [program],
-                               stdout=subprocess.PIPE,
-                               stderr=subprocess.STDOUT,
-                               cwd=test_temp_dir)
-    outb = process.stdout.read()
-    # Split output into lines.
-    out = [s.rstrip('\n\r') for s in str(outb, 'utf8').splitlines()]
-    if not process.wait():
+    returncode, out = run(mypy_cmdline)
+    if returncode == 0:
         # Set up module path for the execution.
         # This needs the typing module but *not* the mypy module.
         vers_dir = '2.7' if py2 else '3.2'
         typing_path = os.path.join(testcase.old_cwd, 'lib-typing', vers_dir)
         assert os.path.isdir(typing_path)
         env = os.environ.copy()
         env['PYTHONPATH'] = typing_path
-        process = subprocess.Popen([interpreter, program],
-                                   stdout=subprocess.PIPE,
-                                   stderr=subprocess.STDOUT,
-                                   cwd=test_temp_dir,
-                                   env=env)
-        outb = process.stdout.read()
-        # Split output into lines.
-        out += [s.rstrip('\n\r') for s in str(outb, 'utf8').splitlines()]
+        returncode, interp_out = run([interpreter, program], env=env)
+        out += interp_out
     # Remove temp file.
     os.remove(program_path)
-    assert_string_arrays_equal(testcase.output, out,
+    assert_string_arrays_equal(adapt_output(testcase), out,
                                'Invalid output ({}, line {})'.format(
                                    testcase.file, testcase.line))
+
+
+def split_lines(*streams: bytes) -> List[str]:
+    """Returns a single list of string lines from the byte streams in args."""
+    return [
+        s.rstrip('\n\r')
+        for stream in streams
+        for s in str(stream, 'utf8').splitlines()
+    ]
+
+
+def adapt_output(testcase: DataDrivenTestCase) -> List[str]:
+    """Translates the generic _program.py into the actual filename."""
+    program = '_' + testcase.name + '.py'
+    return [program_re.sub(program, line) for line in testcase.output]
+
+
+def run(
+    cmdline: List[str], *, env: Dict[str, str] = None, timeout: int = 30
+) -> Tuple[int, List[str]]:
+    """A poor man's subprocess.run() for 3.3 and 3.4 compatibility."""
+    process = subprocess.Popen(
+        cmdline,
+        env=env,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        cwd=test_temp_dir,
+    )
+    try:
+        out, err = process.communicate(timeout=timeout)
+    except subprocess.TimeoutExpired:
+        out = err = b''
+        process.kill()
+    return process.returncode, split_lines(out, err)
diff --git a/runtests.py b/runtests.py
@@ -28,9 +28,12 @@ def get_versions():  # type: () -> typing.List[str]
 
 from mypy.waiter import Waiter, LazySubprocess
 from mypy import util
+from mypy.test.config import test_data_prefix
+from mypy.test.testpythoneval import python_eval_files, python_34_eval_files
 
 import itertools
 import os
+import re
 
 
 # Ideally, all tests would be `discover`able so that they can be driven
@@ -233,9 +236,27 @@ def add_myunit(driver: Driver) -> None:
 
 
 def add_pythoneval(driver: Driver) -> None:
-    driver.add_python_mod('eval-test', 'mypy.myunit',
-                          '-m', 'mypy.test.testpythoneval', *driver.arglist,
-                         coverage=True)
+    cases = set()
+    case_re = re.compile(r'^\[case ([^\]]+)\]$')
+    for file in python_eval_files + python_34_eval_files:
+        with open(os.path.join(test_data_prefix, file), 'r') as f:
+            for line in f:
+                m = case_re.match(line)
+                if m:
+                    case_name = m.group(1)
+                    assert case_name[:4] == 'test'
+                    cases.add(case_name[4:5])
+
+    for prefix in sorted(cases):
+        driver.add_python_mod(
+            'eval-test-' + prefix,
+            'mypy.myunit',
+            '-m',
+            'mypy.test.testpythoneval',
+            'test_testpythoneval_PythonEvaluationSuite.test' + prefix + '*',
+            *driver.arglist,
+            coverage=True
+        )
 
 
 def add_cmdline(driver: Driver) -> None: