diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index b135e30b7ad62a..37a747aa4e3e46 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -614,6 +614,47 @@ PyAPI_FUNC(int) _PyCode_SetUnboundVarCounts( PyObject *globalsns, PyObject *builtinsns); + +/* "Stateless" code is a function or code object which does not rely on + * external state or internal state. It may rely on arguments and + * builtins, but not globals or a closure. Thus it does not rely + * on __globals__ or __closure__, and a stateless function + * is equivalent to its code object. + * + * Stateless code also does not keep any persistent state + * of its own, so it can't have any executors, monitoring, + * instrumentation, or "extras" (i.e. co_extra). + * + * Stateless code may create nested functions, including closures. + * However, nested functions must themselves be stateless, except they + * *can* close on the enclosing locals. + * + * Stateless code may return any value, including nested functions and closures. + * + * Stateless code that takes no arguments and doesn't return anything + * may be treated like a script. + * + * We consider stateless code to be "portable" if it does not return + * any object that holds a reference to any of the code's locals. Thus + * generators and coroutines are not portable. Likewise a function + * that returns a closure is not portable. The concept of + * portability is useful in cases where the code is run + * in a different execution context than where + * the return value will be used. */ + +PyAPI_FUNC(int) _PyCode_CheckNoInternalState(PyCodeObject *, const char **); +PyAPI_FUNC(int) _PyCode_CheckNoExternalState( + PyCodeObject *, + _PyCode_var_counts_t *, + const char **); +PyAPI_FUNC(int) _PyCode_VerifyStateless( + PyThreadState *, + PyCodeObject *, + PyObject *globalnames, + PyObject *globalsns, + PyObject *builtinsns); + +PyAPI_FUNC(int) _PyCode_CheckPureFunction(PyCodeObject *, const char **); PyAPI_FUNC(int) _PyCode_ReturnsOnlyNone(PyCodeObject *); diff --git a/Include/internal/pycore_function.h b/Include/internal/pycore_function.h index 209252b2ddc0de..a30d52d49bdc4d 100644 --- a/Include/internal/pycore_function.h +++ b/Include/internal/pycore_function.h @@ -35,6 +35,13 @@ PyFunctionObject *_PyFunction_LookupByVersion(uint32_t version, PyObject **p_cod extern PyObject *_Py_set_function_type_params( PyThreadState* unused, PyObject *func, PyObject *type_params); + +/* See pycore_code.h for explanation about what "stateless" means. */ + +PyAPI_FUNC(int) +_PyFunction_VerifyStateless(PyThreadState *, PyObject *); + + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_opcode_utils.h b/Include/internal/pycore_opcode_utils.h index 62af06dc01c125..79a1a242556a52 100644 --- a/Include/internal/pycore_opcode_utils.h +++ b/Include/internal/pycore_opcode_utils.h @@ -56,6 +56,8 @@ extern "C" { #define IS_RETURN_OPCODE(opcode) \ (opcode == RETURN_VALUE) +#define IS_RAISE_OPCODE(opcode) \ + (opcode == RAISE_VARARGS || opcode == RERAISE) /* Flags used in the oparg for MAKE_FUNCTION */ diff --git a/Lib/test/_code_definitions.py b/Lib/test/_code_definitions.py index c3daa0dccf5df3..d64ac45d85f396 100644 --- a/Lib/test/_code_definitions.py +++ b/Lib/test/_code_definitions.py @@ -178,6 +178,32 @@ def ham_C_closure(z): *NESTED_FUNCTIONS, ] +STATELESS_FUNCTIONS = [ + spam, + spam_minimal, + spam_with_builtins, + spam_args_attrs_and_builtins, + spam_returns_arg, + spam_annotated, + spam_with_inner_not_closure, + spam_with_inner_closure, + spam_N, + spam_C, + spam_NN, + spam_NC, + spam_CN, + spam_CC, + eggs_nested, + eggs_nested_N, + ham_nested, + ham_C_nested +] +STATELESS_CODE = [ + *STATELESS_FUNCTIONS, + spam_with_globals_and_builtins, + spam_full, +] + # generators diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py index b646042a3b84b0..6715ee051336a1 100644 --- a/Lib/test/test_code.py +++ b/Lib/test/test_code.py @@ -220,6 +220,7 @@ import _testinternalcapi except ModuleNotFoundError: _testinternalcapi = None +import test._code_definitions as defs COPY_FREE_VARS = opmap['COPY_FREE_VARS'] @@ -671,7 +672,6 @@ def test_local_kinds(self): VARARGS = CO_FAST_LOCAL | CO_FAST_ARG_VAR | CO_FAST_ARG_POS VARKWARGS = CO_FAST_LOCAL | CO_FAST_ARG_VAR | CO_FAST_ARG_KW - import test._code_definitions as defs funcs = { defs.spam_minimal: {}, defs.spam_with_builtins: { @@ -897,7 +897,6 @@ def new_var_counts(*, }, } - import test._code_definitions as defs funcs = { defs.spam_minimal: new_var_counts(), defs.spam_with_builtins: new_var_counts( @@ -1025,42 +1024,35 @@ def new_var_counts(*, counts = _testinternalcapi.get_code_var_counts(func.__code__) self.assertEqual(counts, expected) - def func_with_globals_and_builtins(): - mod1 = _testinternalcapi - mod2 = dis - mods = (mod1, mod2) - checks = tuple(callable(m) for m in mods) - return callable(mod2), tuple(mods), list(mods), checks - - func = func_with_globals_and_builtins + func = defs.spam_with_globals_and_builtins with self.subTest(f'{func} code'): expected = new_var_counts( - purelocals=4, - globalvars=5, + purelocals=5, + globalvars=6, ) counts = _testinternalcapi.get_code_var_counts(func.__code__) self.assertEqual(counts, expected) with self.subTest(f'{func} with own globals and builtins'): expected = new_var_counts( - purelocals=4, - globalvars=(2, 3), + purelocals=5, + globalvars=(2, 4), ) counts = _testinternalcapi.get_code_var_counts(func) self.assertEqual(counts, expected) with self.subTest(f'{func} without globals'): expected = new_var_counts( - purelocals=4, - globalvars=(0, 3, 2), + purelocals=5, + globalvars=(0, 4, 2), ) counts = _testinternalcapi.get_code_var_counts(func, globalsns={}) self.assertEqual(counts, expected) with self.subTest(f'{func} without both'): expected = new_var_counts( - purelocals=4, - globalvars=5, + purelocals=5, + globalvars=6, ) counts = _testinternalcapi.get_code_var_counts(func, globalsns={}, builtinsns={}) @@ -1068,12 +1060,34 @@ def func_with_globals_and_builtins(): with self.subTest(f'{func} without builtins'): expected = new_var_counts( - purelocals=4, - globalvars=(2, 0, 3), + purelocals=5, + globalvars=(2, 0, 4), ) counts = _testinternalcapi.get_code_var_counts(func, builtinsns={}) self.assertEqual(counts, expected) + @unittest.skipIf(_testinternalcapi is None, "missing _testinternalcapi") + def test_stateless(self): + self.maxDiff = None + + for func in defs.STATELESS_CODE: + with self.subTest((func, '(code)')): + _testinternalcapi.verify_stateless_code(func.__code__) + for func in defs.STATELESS_FUNCTIONS: + with self.subTest((func, '(func)')): + _testinternalcapi.verify_stateless_code(func) + + for func in defs.FUNCTIONS: + if func not in defs.STATELESS_CODE: + with self.subTest((func, '(code)')): + with self.assertRaises(Exception): + _testinternalcapi.verify_stateless_code(func.__code__) + + if func not in defs.STATELESS_FUNCTIONS: + with self.subTest((func, '(func)')): + with self.assertRaises(Exception): + _testinternalcapi.verify_stateless_code(func) + def isinterned(s): return s is sys.intern(('_' + s + '_')[1:-1]) diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index ae060c95fd5a01..63f1d079d8d312 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1165,6 +1165,47 @@ get_code_var_counts(PyObject *self, PyObject *_args, PyObject *_kwargs) return NULL; } +static PyObject * +verify_stateless_code(PyObject *self, PyObject *args, PyObject *kwargs) +{ + PyThreadState *tstate = _PyThreadState_GET(); + PyObject *codearg; + PyObject *globalnames = NULL; + PyObject *globalsns = NULL; + PyObject *builtinsns = NULL; + static char *kwlist[] = {"code", "globalnames", + "globalsns", "builtinsns", NULL}; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, + "O|O!O!O!:get_code_var_counts", kwlist, + &codearg, &PySet_Type, &globalnames, + &PyDict_Type, &globalsns, &PyDict_Type, &builtinsns)) + { + return NULL; + } + if (PyFunction_Check(codearg)) { + if (globalsns == NULL) { + globalsns = PyFunction_GET_GLOBALS(codearg); + } + if (builtinsns == NULL) { + builtinsns = PyFunction_GET_BUILTINS(codearg); + } + codearg = PyFunction_GET_CODE(codearg); + } + else if (!PyCode_Check(codearg)) { + PyErr_SetString(PyExc_TypeError, + "argument must be a code object or a function"); + return NULL; + } + PyCodeObject *code = (PyCodeObject *)codearg; + + if (_PyCode_VerifyStateless( + tstate, code, globalnames, globalsns, builtinsns) < 0) + { + return NULL; + } + Py_RETURN_NONE; +} + #ifdef _Py_TIER2 static PyObject * @@ -2292,6 +2333,8 @@ static PyMethodDef module_functions[] = { {"get_co_localskinds", get_co_localskinds, METH_O, NULL}, {"get_code_var_counts", _PyCFunction_CAST(get_code_var_counts), METH_VARARGS | METH_KEYWORDS, NULL}, + {"verify_stateless_code", _PyCFunction_CAST(verify_stateless_code), + METH_VARARGS | METH_KEYWORDS, NULL}, #ifdef _Py_TIER2 {"add_executor_dependency", add_executor_dependency, METH_VARARGS, NULL}, {"invalidate_executors", invalidate_executors, METH_O, NULL}, diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 92eaf5e00bc7dd..4f06a36a130207 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -1955,12 +1955,130 @@ _PyCode_SetUnboundVarCounts(PyThreadState *tstate, } +int +_PyCode_CheckNoInternalState(PyCodeObject *co, const char **p_errmsg) +{ + const char *errmsg = NULL; + // We don't worry about co_executors, co_instrumentation, + // or co_monitoring. They are essentially ephemeral. + if (co->co_extra != NULL) { + errmsg = "only basic code objects are supported"; + } + + if (errmsg != NULL) { + if (p_errmsg != NULL) { + *p_errmsg = errmsg; + } + return 0; + } + return 1; +} + +int +_PyCode_CheckNoExternalState(PyCodeObject *co, _PyCode_var_counts_t *counts, + const char **p_errmsg) +{ + const char *errmsg = NULL; + assert(counts->locals.hidden.total == 0); + if (counts->numfree > 0) { // It's a closure. + errmsg = "closures not supported"; + } + else if (counts->unbound.globals.numglobal > 0) { + errmsg = "globals not supported"; + } + else if (counts->unbound.globals.numbuiltin > 0 + && counts->unbound.globals.numunknown > 0) + { + errmsg = "globals not supported"; + } + // Otherwise we don't check counts.unbound.globals.numunknown since we can't + // distinguish beween globals and builtins here. + + if (errmsg != NULL) { + if (p_errmsg != NULL) { + *p_errmsg = errmsg; + } + return 0; + } + return 1; +} + +int +_PyCode_VerifyStateless(PyThreadState *tstate, + PyCodeObject *co, PyObject *globalnames, + PyObject *globalsns, PyObject *builtinsns) +{ + const char *errmsg; + _PyCode_var_counts_t counts = {0}; + _PyCode_GetVarCounts(co, &counts); + if (_PyCode_SetUnboundVarCounts( + tstate, co, &counts, globalnames, NULL, + globalsns, builtinsns) < 0) + { + return -1; + } + // We may consider relaxing the internal state constraints + // if it becomes a problem. + if (!_PyCode_CheckNoInternalState(co, &errmsg)) { + _PyErr_SetString(tstate, PyExc_ValueError, errmsg); + return -1; + } + if (builtinsns != NULL) { + // Make sure the next check will fail for globals, + // even if there aren't any builtins. + counts.unbound.globals.numbuiltin += 1; + } + if (!_PyCode_CheckNoExternalState(co, &counts, &errmsg)) { + _PyErr_SetString(tstate, PyExc_ValueError, errmsg); + return -1; + } + // Note that we don't check co->co_flags & CO_NESTED for anything here. + return 0; +} + + +int +_PyCode_CheckPureFunction(PyCodeObject *co, const char **p_errmsg) +{ + const char *errmsg = NULL; + if (co->co_flags & CO_GENERATOR) { + errmsg = "generators not supported"; + } + else if (co->co_flags & CO_COROUTINE) { + errmsg = "coroutines not supported"; + } + else if (co->co_flags & CO_ITERABLE_COROUTINE) { + errmsg = "coroutines not supported"; + } + else if (co->co_flags & CO_ASYNC_GENERATOR) { + errmsg = "generators not supported"; + } + + if (errmsg != NULL) { + if (p_errmsg != NULL) { + *p_errmsg = errmsg; + } + return 0; + } + return 1; +} + /* Here "value" means a non-None value, since a bare return is identical * to returning None explicitly. Likewise a missing return statement * at the end of the function is turned into "return None". */ static int code_returns_only_none(PyCodeObject *co) { + if (!_PyCode_CheckPureFunction(co, NULL)) { + return 0; + } + int len = (int)Py_SIZE(co); + assert(len > 0); + + // The last instruction either returns or raises. We can take advantage + // of that for a quick exit. + _Py_CODEUNIT final = _Py_GetBaseCodeUnit(co, len-1); + // Look up None in co_consts. Py_ssize_t nconsts = PyTuple_Size(co->co_consts); int none_index = 0; @@ -1971,26 +2089,42 @@ code_returns_only_none(PyCodeObject *co) } if (none_index == nconsts) { // None wasn't there, which means there was no implicit return, - // "return", or "return None". That means there must be - // an explicit return (non-None). - return 0; - } + // "return", or "return None". - // Walk the bytecode, looking for RETURN_VALUE. - Py_ssize_t len = Py_SIZE(co); - for (int i = 0; i < len; i += _PyInstruction_GetLength(co, i)) { - _Py_CODEUNIT inst = _Py_GetBaseCodeUnit(co, i); - if (IS_RETURN_OPCODE(inst.op.code)) { - assert(i != 0); - // Ignore it if it returns None. - _Py_CODEUNIT prev = _Py_GetBaseCodeUnit(co, i-1); - if (prev.op.code == LOAD_CONST) { - // We don't worry about EXTENDED_ARG for now. - if (prev.op.arg == none_index) { - continue; + // That means there must be + // an explicit return (non-None), or it only raises. + if (IS_RETURN_OPCODE(final.op.code)) { + // It was an explicit return (non-None). + return 0; + } + // It must end with a raise then. We still have to walk the + // bytecode to see if there's any explicit return (non-None). + assert(IS_RAISE_OPCODE(final.op.code)); + for (int i = 0; i < len; i += _PyInstruction_GetLength(co, i)) { + _Py_CODEUNIT inst = _Py_GetBaseCodeUnit(co, i); + if (IS_RETURN_OPCODE(inst.op.code)) { + // We alraedy know it isn't returning None. + return 0; + } + } + // It must only raise. + } + else { + // Walk the bytecode, looking for RETURN_VALUE. + for (int i = 0; i < len; i += _PyInstruction_GetLength(co, i)) { + _Py_CODEUNIT inst = _Py_GetBaseCodeUnit(co, i); + if (IS_RETURN_OPCODE(inst.op.code)) { + assert(i != 0); + // Ignore it if it returns None. + _Py_CODEUNIT prev = _Py_GetBaseCodeUnit(co, i-1); + if (prev.op.code == LOAD_CONST) { + // We don't worry about EXTENDED_ARG for now. + if (prev.op.arg == none_index) { + continue; + } } + return 0; } - return 0; } } return 1; diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 56df5730db0c55..27214a129c2fb8 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -1,12 +1,14 @@ /* Function object implementation */ #include "Python.h" +#include "pycore_code.h" // _PyCode_VerifyStateless() #include "pycore_dict.h" // _Py_INCREF_DICT() #include "pycore_function.h" // _PyFunction_Vectorcall #include "pycore_long.h" // _PyLong_GetOne() #include "pycore_modsupport.h" // _PyArg_NoKeywords() #include "pycore_object.h" // _PyObject_GC_UNTRACK() #include "pycore_pyerrors.h" // _PyErr_Occurred() +#include "pycore_setobject.h" // _PySet_NextEntry() #include "pycore_stats.h" @@ -1240,6 +1242,58 @@ PyTypeObject PyFunction_Type = { }; +int +_PyFunction_VerifyStateless(PyThreadState *tstate, PyObject *func) +{ + assert(!PyErr_Occurred()); + assert(PyFunction_Check(func)); + + // Check the globals. + PyObject *globalsns = PyFunction_GET_GLOBALS(func); + if (globalsns != NULL && !PyDict_Check(globalsns)) { + _PyErr_Format(tstate, PyExc_TypeError, + "unsupported globals %R", globalsns); + return -1; + } + // Check the builtins. + PyObject *builtinsns = PyFunction_GET_BUILTINS(func); + if (builtinsns != NULL && !PyDict_Check(builtinsns)) { + _PyErr_Format(tstate, PyExc_TypeError, + "unsupported builtins %R", builtinsns); + return -1; + } + // Disallow __defaults__. + PyObject *defaults = PyFunction_GET_DEFAULTS(func); + if (defaults != NULL && defaults != Py_None && PyDict_Size(defaults) > 0) + { + _PyErr_SetString(tstate, PyExc_ValueError, "defaults not supported"); + return -1; + } + // Disallow __kwdefaults__. + PyObject *kwdefaults = PyFunction_GET_KW_DEFAULTS(func); + if (kwdefaults != NULL && kwdefaults != Py_None + && PyDict_Size(kwdefaults) > 0) + { + _PyErr_SetString(tstate, PyExc_ValueError, + "keyword defaults not supported"); + return -1; + } + // Disallow __closure__. + PyObject *closure = PyFunction_GET_CLOSURE(func); + if (closure != NULL && closure != Py_None && PyTuple_GET_SIZE(closure) > 0) + { + _PyErr_SetString(tstate, PyExc_ValueError, "closures not supported"); + return -1; + } + // Check the code. + PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func); + if (_PyCode_VerifyStateless(tstate, co, NULL, globalsns, builtinsns) < 0) { + return -1; + } + return 0; +} + + static int functools_copy_attr(PyObject *wrapper, PyObject *wrapped, PyObject *name) {