bpo-20891: Fix PyGILState_Ensure() (#4650) (#4655)

vstinner · web-flow · commit e10c9de9d74f · 2017-11-30T23:36:49.000+01:00
When PyGILState_Ensure() is called in a non-Python thread before PyEval_InitThreads(), only call PyEval_InitThreads() after calling PyThreadState_New() to fix a crash. Add an unit test in test_embed. Enhance also embedded tests, backport from master: * Add test_pre_initialization_api() * Set PYTHONIOENCODING environment variable in test_forced_io_encoding() (cherry picked from commit b4d1e1f)
diff --git a/Lib/test/test_capi.py b/Lib/test/test_capi.py
@@ -401,23 +401,30 @@ def setUp(self):
     def tearDown(self):
         os.chdir(self.oldcwd)
 
-    def run_embedded_interpreter(self, *args):
+    def run_embedded_interpreter(self, *args, env=None):
         """Runs a test in the embedded interpreter"""
         cmd = [self.test_exe]
         cmd.extend(args)
+        if env is not None and sys.platform == 'win32':
+            # Windows requires at least the SYSTEMROOT environment variable to
+            # start Python.
+            env = env.copy()
+            env['SYSTEMROOT'] = os.environ['SYSTEMROOT']
+
         p = subprocess.Popen(cmd,
                              stdout=subprocess.PIPE,
                              stderr=subprocess.PIPE,
-                             universal_newlines=True)
+                             universal_newlines=True,
+                             env=env)
         (out, err) = p.communicate()
         self.assertEqual(p.returncode, 0,
                          "bad returncode %d, stderr is %r" %
                          (p.returncode, err))
         return out, err
 
-    def test_subinterps(self):
+    def test_repeated_init_and_subinterpreters(self):
         # This is just a "don't crash" test
-        out, err = self.run_embedded_interpreter()
+        out, err = self.run_embedded_interpreter('repeated_init_and_subinterpreters')
         if support.verbose:
             print()
             print(out)
@@ -435,13 +442,14 @@ def _get_default_pipe_encoding():
 
     def test_forced_io_encoding(self):
         # Checks forced configuration of embedded interpreter IO streams
-        out, err = self.run_embedded_interpreter("forced_io_encoding")
+        env = dict(os.environ, PYTHONIOENCODING="utf-8:surrogateescape")
+        out, err = self.run_embedded_interpreter("forced_io_encoding", env=env)
         if support.verbose:
             print()
             print(out)
             print(err)
-        expected_errors = sys.__stdout__.errors
-        expected_stdin_encoding = sys.__stdin__.encoding
+        expected_stream_encoding = "utf-8"
+        expected_errors = "surrogateescape"
         expected_pipe_encoding = self._get_default_pipe_encoding()
         expected_output = '\n'.join([
         "--- Use defaults ---",
@@ -469,13 +477,33 @@ def test_forced_io_encoding(self):
         "stdout: latin-1:replace",
         "stderr: latin-1:backslashreplace"])
         expected_output = expected_output.format(
-                                in_encoding=expected_stdin_encoding,
-                                out_encoding=expected_pipe_encoding,
+                                in_encoding=expected_stream_encoding,
+                                out_encoding=expected_stream_encoding,
                                 errors=expected_errors)
         # This is useful if we ever trip over odd platform behaviour
         self.maxDiff = None
         self.assertEqual(out.strip(), expected_output)
 
+    def test_pre_initialization_api(self):
+        """
+        Checks the few parts of the C-API that work before the runtine
+        is initialized (via Py_Initialize()).
+        """
+        env = dict(os.environ, PYTHONPATH=os.pathsep.join(sys.path))
+        out, err = self.run_embedded_interpreter("pre_initialization_api", env=env)
+        self.assertEqual(out, '')
+        self.assertEqual(err, '')
+
+    def test_bpo20891(self):
+        """
+        bpo-20891: Calling PyGILState_Ensure in a non-Python thread before
+        calling PyEval_InitThreads() must not crash. PyGILState_Ensure() must
+        call PyEval_InitThreads() for us in this case.
+        """
+        out, err = self.run_embedded_interpreter("bpo20891")
+        self.assertEqual(out, '')
+        self.assertEqual(err, '')
+
 
 class SkipitemTest(unittest.TestCase):
 
diff --git a/Misc/NEWS.d/next/C API/2017-11-30-18-13-45.bpo-20891.wBnMdF.rst b/Misc/NEWS.d/next/C API/2017-11-30-18-13-45.bpo-20891.wBnMdF.rst
@@ -0,0 +1,3 @@
+Fix PyGILState_Ensure(). When PyGILState_Ensure() is called in a non-Python
+thread before PyEval_InitThreads(), only call PyEval_InitThreads() after
+calling PyThreadState_New() to fix a crash.
diff --git a/Programs/_testembed.c b/Programs/_testembed.c
@@ -1,4 +1,5 @@
 #include <Python.h>
+#include "pythread.h"
 #include <stdio.h>
 
 /*********************************************************
@@ -33,7 +34,7 @@ static void print_subinterp(void)
     );
 }
 
-static void test_repeated_init_and_subinterpreters(void)
+static int test_repeated_init_and_subinterpreters(void)
 {
     PyThreadState *mainstate, *substate;
 #ifdef WITH_THREAD
@@ -70,6 +71,7 @@ static void test_repeated_init_and_subinterpreters(void)
         PyEval_RestoreThread(mainstate);
         Py_Finalize();
     }
+    return 0;
 }
 
 /*****************************************************
@@ -103,7 +105,7 @@ static void check_stdio_details(const char *encoding, const char * errors)
     Py_Finalize();
 }
 
-static void test_forced_io_encoding(void)
+static int test_forced_io_encoding(void)
 {
     /* Check various combinations */
     printf("--- Use defaults ---\n");
@@ -122,19 +124,123 @@ static void test_forced_io_encoding(void)
         printf("Unexpected success calling Py_SetStandardStreamEncoding");
     }
     Py_Finalize();
+    return 0;
 }
 
-/* Different embedding tests */
-int main(int argc, char *argv[])
+
+/*********************************************************
+ * Test parts of the C-API that work before initialization
+ *********************************************************/
+
+static int test_pre_initialization_api(void)
 {
+    /* Leading "./" ensures getpath.c can still find the standard library */
+    wchar_t *program = Py_DecodeLocale("./spam", NULL);
+    if (program == NULL) {
+        fprintf(stderr, "Fatal error: cannot decode program name\n");
+        return 1;
+    }
+    Py_SetProgramName(program);
 
-    /* TODO: Check the argument string to allow for more test cases */
-    if (argc > 1) {
-        /* For now: assume "forced_io_encoding */
-        test_forced_io_encoding();
-    } else {
-        /* Run the original embedding test case by default */
-        test_repeated_init_and_subinterpreters();
+    Py_Initialize();
+    Py_Finalize();
+
+    PyMem_RawFree(program);
+    return 0;
+}
+
+static void bpo20891_thread(void *lockp)
+{
+    PyThread_type_lock lock = *((PyThread_type_lock*)lockp);
+
+    PyGILState_STATE state = PyGILState_Ensure();
+    if (!PyGILState_Check()) {
+        fprintf(stderr, "PyGILState_Check failed!");
+        abort();
+    }
+
+    PyGILState_Release(state);
+
+    PyThread_release_lock(lock);
+
+    PyThread_exit_thread();
+}
+
+static int test_bpo20891(void)
+{
+    /* bpo-20891: Calling PyGILState_Ensure in a non-Python thread before
+       calling PyEval_InitThreads() must not crash. PyGILState_Ensure() must
+       call PyEval_InitThreads() for us in this case. */
+    PyThread_type_lock lock = PyThread_allocate_lock();
+    if (!lock) {
+        fprintf(stderr, "PyThread_allocate_lock failed!");
+        return 1;
+    }
+
+    _testembed_Py_Initialize();
+
+    long thrd = PyThread_start_new_thread(bpo20891_thread, &lock);
+    if (thrd == -1) {
+        fprintf(stderr, "PyThread_start_new_thread failed!");
+        return 1;
     }
+    PyThread_acquire_lock(lock, WAIT_LOCK);
+
+    Py_BEGIN_ALLOW_THREADS
+    /* wait until the thread exit */
+    PyThread_acquire_lock(lock, WAIT_LOCK);
+    Py_END_ALLOW_THREADS
+
+    PyThread_free_lock(lock);
+
     return 0;
 }
+
+
+/* *********************************************************
+ * List of test cases and the function that implements it.
+ *
+ * Names are compared case-sensitively with the first
+ * argument. If no match is found, or no first argument was
+ * provided, the names of all test cases are printed and
+ * the exit code will be -1.
+ *
+ * The int returned from test functions is used as the exit
+ * code, and test_capi treats all non-zero exit codes as a
+ * failed test.
+ *********************************************************/
+struct TestCase
+{
+    const char *name;
+    int (*func)(void);
+};
+
+static struct TestCase TestCases[] = {
+    { "forced_io_encoding", test_forced_io_encoding },
+    { "repeated_init_and_subinterpreters", test_repeated_init_and_subinterpreters },
+    { "pre_initialization_api", test_pre_initialization_api },
+    { "bpo20891", test_bpo20891 },
+    { NULL, NULL }
+};
+
+int main(int argc, char *argv[])
+{
+    if (argc > 1) {
+        for (struct TestCase *tc = TestCases; tc && tc->name; tc++) {
+            if (strcmp(argv[1], tc->name) == 0)
+                return (*tc->func)();
+        }
+    }
+
+    /* No match found, or no test name provided, so display usage */
+    printf("Python " PY_VERSION " _testembed executable for embedded interpreter tests\n"
+           "Normally executed via 'EmbeddingTests' in Lib/test/test_capi.py\n\n"
+           "Usage: %s TESTNAME\n\nAll available tests:\n", argv[0]);
+    for (struct TestCase *tc = TestCases; tc && tc->name; tc++) {
+        printf("  %s\n", tc->name);
+    }
+
+    /* Non-zero exit code will cause test_capi.py tests to fail.
+       This is intentional. */
+    return -1;
+}
diff --git a/Python/pystate.c b/Python/pystate.c
@@ -865,6 +865,8 @@ PyGILState_Ensure(void)
 {
     int current;
     PyThreadState *tcur;
+    int need_init_threads = 0;
+
     /* Note that we do not auto-init Python here - apart from
        potential races with 2 threads auto-initializing, pep-311
        spells out other issues.  Embedders are expected to have
@@ -873,10 +875,7 @@ PyGILState_Ensure(void)
     assert(autoInterpreterState); /* Py_Initialize() hasn't been called! */
     tcur = (PyThreadState *)PyThread_get_key_value(autoTLSkey);
     if (tcur == NULL) {
-        /* At startup, Python has no concrete GIL. If PyGILState_Ensure() is
-           called from a new thread for the first time, we need the create the
-           GIL. */
-        PyEval_InitThreads();
+        need_init_threads = 1;
 
         /* Create a new thread state for this thread */
         tcur = PyThreadState_New(autoInterpreterState);
@@ -887,16 +886,28 @@ PyGILState_Ensure(void)
         tcur->gilstate_counter = 0;
         current = 0; /* new thread state is never current */
     }
-    else
+    else {
         current = PyThreadState_IsCurrent(tcur);
-    if (current == 0)
+    }
+
+    if (current == 0) {
         PyEval_RestoreThread(tcur);
+    }
+
     /* Update our counter in the thread-state - no need for locks:
        - tcur will remain valid as we hold the GIL.
        - the counter is safe as we are the only thread "allowed"
          to modify this value
     */
     ++tcur->gilstate_counter;
+
+    if (need_init_threads) {
+        /* At startup, Python has no concrete GIL. If PyGILState_Ensure() is
+           called from a new thread for the first time, we need the create the
+           GIL. */
+        PyEval_InitThreads();
+    }
+
     return current ? PyGILState_LOCKED : PyGILState_UNLOCKED;
 }
 

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+Fix PyGILState_Ensure(). When PyGILState_Ensure() is called in a non-Python`
	`2`	`+thread before PyEval_InitThreads(), only call PyEval_InitThreads() after`
	`3`	`+calling PyThreadState_New() to fix a crash.`