From 9f15c4b85931eb4086c40571e941e1348232eb3a Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 9 Nov 2022 18:38:38 -0800 Subject: [PATCH 01/29] Support simple cache effects Had to refactor the parser a bit for this. --- Python/bytecodes.c | 12 ++-- Python/generated_cases.c.h | 8 +-- Tools/cases_generator/generate_cases.py | 42 ++++++++--- Tools/cases_generator/parser.py | 94 +++++++++++++++---------- 4 files changed, 94 insertions(+), 62 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index d2df56ef7ba330..c6ca655e39c0fd 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -201,7 +201,7 @@ dummy_func( ERROR_IF(res == NULL, error); } - inst(BINARY_OP_MULTIPLY_INT, (left, right -- prod)) { + inst(BINARY_OP_MULTIPLY_INT, (left, right, unused/1 -- prod)) { assert(cframe.use_tracing == 0); DEOPT_IF(!PyLong_CheckExact(left), BINARY_OP); DEOPT_IF(!PyLong_CheckExact(right), BINARY_OP); @@ -210,10 +210,9 @@ dummy_func( _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); ERROR_IF(prod == NULL, error); - JUMPBY(INLINE_CACHE_ENTRIES_BINARY_OP); } - inst(BINARY_OP_MULTIPLY_FLOAT, (left, right -- prod)) { + inst(BINARY_OP_MULTIPLY_FLOAT, (left, right, unused/1 -- prod)) { assert(cframe.use_tracing == 0); DEOPT_IF(!PyFloat_CheckExact(left), BINARY_OP); DEOPT_IF(!PyFloat_CheckExact(right), BINARY_OP); @@ -224,10 +223,9 @@ dummy_func( _Py_DECREF_SPECIALIZED(right, _PyFloat_ExactDealloc); _Py_DECREF_SPECIALIZED(left, _PyFloat_ExactDealloc); ERROR_IF(prod == NULL, error); - JUMPBY(INLINE_CACHE_ENTRIES_BINARY_OP); } - inst(BINARY_OP_SUBTRACT_INT, (left, right -- sub)) { + inst(BINARY_OP_SUBTRACT_INT, (left, right, unused/1 -- sub)) { assert(cframe.use_tracing == 0); DEOPT_IF(!PyLong_CheckExact(left), BINARY_OP); DEOPT_IF(!PyLong_CheckExact(right), BINARY_OP); @@ -236,10 +234,9 @@ dummy_func( _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); ERROR_IF(sub == NULL, error); - JUMPBY(INLINE_CACHE_ENTRIES_BINARY_OP); } - inst(BINARY_OP_SUBTRACT_FLOAT, (left, right -- sub)) { + inst(BINARY_OP_SUBTRACT_FLOAT, (left, right, unused/1 -- sub)) { assert(cframe.use_tracing == 0); DEOPT_IF(!PyFloat_CheckExact(left), BINARY_OP); DEOPT_IF(!PyFloat_CheckExact(right), BINARY_OP); @@ -249,7 +246,6 @@ dummy_func( _Py_DECREF_SPECIALIZED(right, _PyFloat_ExactDealloc); _Py_DECREF_SPECIALIZED(left, _PyFloat_ExactDealloc); ERROR_IF(sub == NULL, error); - JUMPBY(INLINE_CACHE_ENTRIES_BINARY_OP); } inst(BINARY_OP_ADD_UNICODE, (left, right -- res)) { diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index da6b34038cdedf..77b4255e7f9018 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -143,9 +143,9 @@ _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); if (prod == NULL) goto pop_2_error; - JUMPBY(INLINE_CACHE_ENTRIES_BINARY_OP); STACK_SHRINK(1); POKE(1, prod); + next_instr += 1; DISPATCH(); } @@ -163,9 +163,9 @@ _Py_DECREF_SPECIALIZED(right, _PyFloat_ExactDealloc); _Py_DECREF_SPECIALIZED(left, _PyFloat_ExactDealloc); if (prod == NULL) goto pop_2_error; - JUMPBY(INLINE_CACHE_ENTRIES_BINARY_OP); STACK_SHRINK(1); POKE(1, prod); + next_instr += 1; DISPATCH(); } @@ -181,9 +181,9 @@ _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); if (sub == NULL) goto pop_2_error; - JUMPBY(INLINE_CACHE_ENTRIES_BINARY_OP); STACK_SHRINK(1); POKE(1, sub); + next_instr += 1; DISPATCH(); } @@ -200,9 +200,9 @@ _Py_DECREF_SPECIALIZED(right, _PyFloat_ExactDealloc); _Py_DECREF_SPECIALIZED(left, _PyFloat_ExactDealloc); if (sub == NULL) goto pop_2_error; - JUMPBY(INLINE_CACHE_ENTRIES_BINARY_OP); STACK_SHRINK(1); POKE(1, sub); + next_instr += 1; DISPATCH(); } diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index b4f5f8f01dc181..11ad8efd2a391c 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -18,7 +18,6 @@ arg_parser = argparse.ArgumentParser() arg_parser.add_argument("-i", "--input", type=str, default="Python/bytecodes.c") arg_parser.add_argument("-o", "--output", type=str, default="Python/generated_cases.c.h") -arg_parser.add_argument("-c", "--compare", action="store_true") arg_parser.add_argument("-q", "--quiet", action="store_true") @@ -73,12 +72,30 @@ def write_instr(instr: InstDef, predictions: set[str], indent: str, f: TextIO, d assert instr.block if dedent < 0: indent += " " * -dedent + # Separate stack inputs from cache inputs + input_names: set[str] = set() + stack: list[parser.StackEffect] = [] + cache: list[parser.CacheEffect] = [] + for input in instr.inputs: + if isinstance(input, parser.StackEffect): + stack.append(input) + input_names.add(input.name) + else: + assert isinstance(input, parser.CacheEffect), input + cache.append(input) + outputs = instr.outputs + cache_offset = 0 + for ceffect in cache: + if ceffect.name not in ("unused", "u", "_"): + bits = ceffect.size * 16 + f.write(f"{indent} PyObject *{ceffect.name} = read{bits}(next_instr + {cache_offset});\n") + cache_offset += ceffect.size # TODO: Is it better to count forward or backward? - for i, input in enumerate(reversed(instr.inputs), 1): - f.write(f"{indent} PyObject *{input} = PEEK({i});\n") + for i, effect in enumerate(reversed(stack), 1): + f.write(f"{indent} PyObject *{effect.name} = PEEK({i});\n") for output in instr.outputs: - if output not in instr.inputs: - f.write(f"{indent} PyObject *{output};\n") + if output.name not in input_names: + f.write(f"{indent} PyObject *{output.name};\n") assert instr.block is not None blocklines = instr.block.to_text(dedent=dedent).splitlines(True) # Remove blank lines from ends @@ -95,7 +112,7 @@ def write_instr(instr: InstDef, predictions: set[str], indent: str, f: TextIO, d while blocklines and not blocklines[-1].strip(): blocklines.pop() # Write the body - ninputs = len(instr.inputs or ()) + ninputs = len(stack) for line in blocklines: if m := re.match(r"(\s*)ERROR_IF\((.+), (\w+)\);\s*$", line): space, cond, label = m.groups() @@ -107,16 +124,19 @@ def write_instr(instr: InstDef, predictions: set[str], indent: str, f: TextIO, d f.write(f"{space}if ({cond}) goto {label};\n") else: f.write(line) - noutputs = len(instr.outputs or ()) + noutputs = len(outputs) diff = noutputs - ninputs if diff > 0: f.write(f"{indent} STACK_GROW({diff});\n") elif diff < 0: f.write(f"{indent} STACK_SHRINK({-diff});\n") - for i, output in enumerate(reversed(instr.outputs or ()), 1): - if output not in (instr.inputs or ()): - f.write(f"{indent} POKE({i}, {output});\n") - assert instr.block + for i, output in enumerate(reversed(outputs), 1): + if output.name not in (input_names): + f.write(f"{indent} POKE({i}, {output.name});\n") + # Cache effect + if cache_offset: + f.write(f"{indent} next_instr += {cache_offset};\n") + def write_cases(f: TextIO, instrs: list[InstDef], supers: list[parser.Super]): predictions: set[str] = set() diff --git a/Tools/cases_generator/parser.py b/Tools/cases_generator/parser.py index 9e95cdb42d42db..a358e84b44d387 100644 --- a/Tools/cases_generator/parser.py +++ b/Tools/cases_generator/parser.py @@ -56,11 +56,28 @@ class Block(Node): tokens: list[lx.Token] +@dataclass +class Effect(Node): + pass + + +@dataclass +class StackEffect(Effect): + name: str + # TODO: type, condition + + +@dataclass +class CacheEffect(Effect): + name: str + size: int + + @dataclass class InstHeader(Node): name: str - inputs: list[str] - outputs: list[str] + inputs: list[Effect] + outputs: list[Effect] @dataclass @@ -69,16 +86,17 @@ class InstDef(Node): block: Block @property - def name(self): + def name(self) -> str: return self.header.name @property - def inputs(self): + def inputs(self) -> list[Effect]: return self.header.inputs @property - def outputs(self): - return self.header.outputs + def outputs(self) -> list[StackEffect]: + # This is always true + return [x for x in self.header.outputs if isinstance(x, StackEffect)] @dataclass @@ -123,18 +141,16 @@ def inst_header(self) -> InstHeader | None: return InstHeader(name, [], []) return None - def check_overlaps(self, inp: list[str], outp: list[str]): + def check_overlaps(self, inp: list[Effect], outp: list[Effect]): for i, name in enumerate(inp): - try: - j = outp.index(name) - except ValueError: - continue - else: - if i != j: - raise self.make_syntax_error( - f"Input {name!r} at pos {i} repeated in output at different pos {j}") + for j, name2 in enumerate(outp): + if name == name2: + if i != j: + raise self.make_syntax_error( + f"Input {name!r} at pos {i} repeated in output at different pos {j}") + break - def stack_effect(self) -> tuple[list[str], list[str]]: + def stack_effect(self) -> tuple[list[Effect], list[Effect]]: # '(' [inputs] '--' [outputs] ')' if self.expect(lx.LPAREN): inp = self.inputs() or [] @@ -144,8 +160,8 @@ def stack_effect(self) -> tuple[list[str], list[str]]: return inp, outp raise self.make_syntax_error("Expected stack effect") - def inputs(self) -> list[str] | None: - # input (, input)* + def inputs(self) -> list[Effect] | None: + # input (',' input)* here = self.getpos() if inp := self.input(): near = self.getpos() @@ -157,27 +173,25 @@ def inputs(self) -> list[str] | None: self.setpos(here) return None - def input(self) -> str | None: - # IDENTIFIER + @contextual + def input(self) -> Effect | None: + # IDENTIFIER '/' INTEGER (CacheEffect) + # IDENTIFIER (StackEffect) if (tkn := self.expect(lx.IDENTIFIER)): - if self.expect(lx.LBRACKET): - if arg := self.expect(lx.IDENTIFIER): - if self.expect(lx.RBRACKET): - return f"{tkn.text}[{arg.text}]" - if self.expect(lx.TIMES): - if num := self.expect(lx.NUMBER): - if self.expect(lx.RBRACKET): - return f"{tkn.text}[{arg.text}*{num.text}]" - raise self.make_syntax_error("Expected argument in brackets", tkn) - - return tkn.text - if self.expect(lx.CONDOP): - while self.expect(lx.CONDOP): - pass - return "??" - return None + if self.expect(lx.DIVIDE): + if num := self.expect(lx.NUMBER): + try: + size = int(num.text) + except ValueError: + raise self.make_syntax_error( + f"Expected integer, got {num.text!r}") + else: + return CacheEffect(tkn.text, size) + raise self.make_syntax_error("Expected integer") + else: + return StackEffect(tkn.text) - def outputs(self) -> list[str] | None: + def outputs(self) -> list[Effect] | None: # output (, output)* here = self.getpos() if outp := self.output(): @@ -190,8 +204,10 @@ def outputs(self) -> list[str] | None: self.setpos(here) return None - def output(self) -> str | None: - return self.input() # TODO: They're not quite the same. + @contextual + def output(self) -> Effect | None: + if (tkn := self.expect(lx.IDENTIFIER)): + return StackEffect(tkn.text) @contextual def super_def(self) -> Super | None: From 6189043d9aca8b209a779544f27dc3d2167f2065 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 9 Nov 2022 22:15:48 -0800 Subject: [PATCH 02/29] More BINARY_OP instructions --- Python/bytecodes.c | 9 +++------ Python/generated_cases.c.h | 6 +++--- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index c6ca655e39c0fd..881f49e00c85a8 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -248,7 +248,7 @@ dummy_func( ERROR_IF(sub == NULL, error); } - inst(BINARY_OP_ADD_UNICODE, (left, right -- res)) { + inst(BINARY_OP_ADD_UNICODE, (left, right, unused/1 -- res)) { assert(cframe.use_tracing == 0); DEOPT_IF(!PyUnicode_CheckExact(left), BINARY_OP); DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_OP); @@ -257,7 +257,6 @@ dummy_func( _Py_DECREF_SPECIALIZED(left, _PyUnicode_ExactDealloc); _Py_DECREF_SPECIALIZED(right, _PyUnicode_ExactDealloc); ERROR_IF(res == NULL, error); - JUMPBY(INLINE_CACHE_ENTRIES_BINARY_OP); } // This is a subtle one. It's a super-instruction for @@ -296,7 +295,7 @@ dummy_func( JUMPBY(INLINE_CACHE_ENTRIES_BINARY_OP + 1); } - inst(BINARY_OP_ADD_FLOAT, (left, right -- sum)) { + inst(BINARY_OP_ADD_FLOAT, (left, right, unused/1 -- sum)) { assert(cframe.use_tracing == 0); DEOPT_IF(!PyFloat_CheckExact(left), BINARY_OP); DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_OP); @@ -307,10 +306,9 @@ dummy_func( _Py_DECREF_SPECIALIZED(right, _PyFloat_ExactDealloc); _Py_DECREF_SPECIALIZED(left, _PyFloat_ExactDealloc); ERROR_IF(sum == NULL, error); - JUMPBY(INLINE_CACHE_ENTRIES_BINARY_OP); } - inst(BINARY_OP_ADD_INT, (left, right -- sum)) { + inst(BINARY_OP_ADD_INT, (left, right, unused/1 -- sum)) { assert(cframe.use_tracing == 0); DEOPT_IF(!PyLong_CheckExact(left), BINARY_OP); DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_OP); @@ -319,7 +317,6 @@ dummy_func( _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); ERROR_IF(sum == NULL, error); - JUMPBY(INLINE_CACHE_ENTRIES_BINARY_OP); } inst(BINARY_SUBSCR, (container, sub -- res)) { diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 77b4255e7f9018..820b0d48bafdc7 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -218,9 +218,9 @@ _Py_DECREF_SPECIALIZED(left, _PyUnicode_ExactDealloc); _Py_DECREF_SPECIALIZED(right, _PyUnicode_ExactDealloc); if (res == NULL) goto pop_2_error; - JUMPBY(INLINE_CACHE_ENTRIES_BINARY_OP); STACK_SHRINK(1); POKE(1, res); + next_instr += 1; DISPATCH(); } @@ -272,9 +272,9 @@ _Py_DECREF_SPECIALIZED(right, _PyFloat_ExactDealloc); _Py_DECREF_SPECIALIZED(left, _PyFloat_ExactDealloc); if (sum == NULL) goto pop_2_error; - JUMPBY(INLINE_CACHE_ENTRIES_BINARY_OP); STACK_SHRINK(1); POKE(1, sum); + next_instr += 1; DISPATCH(); } @@ -290,9 +290,9 @@ _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); if (sum == NULL) goto pop_2_error; - JUMPBY(INLINE_CACHE_ENTRIES_BINARY_OP); STACK_SHRINK(1); POKE(1, sum); + next_instr += 1; DISPATCH(); } From a8d608d66fd11ee3fd54b3b07a1d85b0e03df93e Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 10 Nov 2022 07:36:08 -0800 Subject: [PATCH 03/29] Tweak dummy definitions in bytecodes.c after merge --- Python/bytecodes.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 4506ca94bbe3d2..26962973099247 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -77,10 +77,6 @@ do { \ #define NAME_ERROR_MSG \ "name '%.200s' is not defined" -typedef struct { - PyObject *kwnames; -} CallShape; - // Dummy variables for stack effects. static PyObject *value, *value1, *value2, *left, *right, *res, *sum, *prod, *sub; static PyObject *container, *start, *stop, *v; @@ -102,6 +98,8 @@ dummy_func( binaryfunc binary_ops[] ) { + _PyInterpreterFrame entry_frame; + switch (opcode) { /* BEWARE! From 4ee85e7b555691d1ae69906c67d6863c8a32464f Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 10 Nov 2022 16:27:32 +0100 Subject: [PATCH 04/29] gh-99300: Use Py_NewRef() in Objects/ directory (#99332) Replace Py_INCREF() and Py_XINCREF() with Py_NewRef() and Py_XNewRef() in C files of the Objects/ directory. --- Objects/abstract.c | 22 ++++++------------ Objects/boolobject.c | 3 +-- Objects/bytesobject.c | 51 ++++++++++++++--------------------------- Objects/call.c | 9 +++----- Objects/cellobject.c | 12 ++++------ Objects/classobject.c | 24 +++++++------------ Objects/codeobject.c | 51 ++++++++++++++--------------------------- Objects/complexobject.c | 15 ++++-------- Objects/descrobject.c | 46 ++++++++++++------------------------- 9 files changed, 77 insertions(+), 156 deletions(-) diff --git a/Objects/abstract.c b/Objects/abstract.c index 5d50491b2dd347..8aa3fc17c6341b 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -45,8 +45,7 @@ PyObject_Type(PyObject *o) } v = (PyObject *)Py_TYPE(o); - Py_INCREF(v); - return v; + return Py_NewRef(v); } Py_ssize_t @@ -722,9 +721,7 @@ PyBuffer_FillInfo(Py_buffer *view, PyObject *obj, void *buf, Py_ssize_t len, return -1; } - view->obj = obj; - if (obj) - Py_INCREF(obj); + view->obj = Py_XNewRef(obj); view->buf = buf; view->len = len; view->readonly = readonly; @@ -776,8 +773,7 @@ PyObject_Format(PyObject *obj, PyObject *format_spec) /* Fast path for common types. */ if (format_spec == NULL || PyUnicode_GET_LENGTH(format_spec) == 0) { if (PyUnicode_CheckExact(obj)) { - Py_INCREF(obj); - return obj; + return Py_NewRef(obj); } if (PyLong_CheckExact(obj)) { return PyObject_Str(obj); @@ -1405,8 +1401,7 @@ _PyNumber_Index(PyObject *item) } if (PyLong_Check(item)) { - Py_INCREF(item); - return item; + return Py_NewRef(item); } if (!_PyIndex_Check(item)) { PyErr_Format(PyExc_TypeError, @@ -1520,8 +1515,7 @@ PyNumber_Long(PyObject *o) } if (PyLong_CheckExact(o)) { - Py_INCREF(o); - return o; + return Py_NewRef(o); } m = Py_TYPE(o)->tp_as_number; if (m && m->nb_int) { /* This should include subclasses of int */ @@ -2045,8 +2039,7 @@ PySequence_Tuple(PyObject *v) a tuple *subclass* instance as-is, hence the restriction to exact tuples here. In contrast, lists always make a copy, so there's no need for exactness below. */ - Py_INCREF(v); - return v; + return Py_NewRef(v); } if (PyList_CheckExact(v)) return PyList_AsTuple(v); @@ -2144,8 +2137,7 @@ PySequence_Fast(PyObject *v, const char *m) } if (PyList_CheckExact(v) || PyTuple_CheckExact(v)) { - Py_INCREF(v); - return v; + return Py_NewRef(v); } it = PyObject_GetIter(v); diff --git a/Objects/boolobject.c b/Objects/boolobject.c index 8a20e368d4a42b..18666f88cbde10 100644 --- a/Objects/boolobject.c +++ b/Objects/boolobject.c @@ -23,8 +23,7 @@ PyObject *PyBool_FromLong(long ok) result = Py_True; else result = Py_False; - Py_INCREF(result); - return result; + return Py_NewRef(result); } /* We define bool_new to always return either Py_True or Py_False */ diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 80660881920fb7..91c89bbd9005a7 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -53,8 +53,7 @@ static inline PyObject* bytes_get_empty(void) // Return a strong reference to the empty bytes string singleton. static inline PyObject* bytes_new_empty(void) { - Py_INCREF(EMPTY); - return (PyObject *)EMPTY; + return Py_NewRef(EMPTY); } @@ -126,8 +125,7 @@ PyBytes_FromStringAndSize(const char *str, Py_ssize_t size) } if (size == 1 && str != NULL) { op = CHARACTER(*str & 255); - Py_INCREF(op); - return (PyObject *)op; + return Py_NewRef(op); } if (size == 0) { return bytes_new_empty(); @@ -162,8 +160,7 @@ PyBytes_FromString(const char *str) } else if (size == 1) { op = CHARACTER(*str & 255); - Py_INCREF(op); - return (PyObject *)op; + return Py_NewRef(op); } /* Inline PyObject_NewVar */ @@ -527,14 +524,12 @@ format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen) if (PyBytes_Check(v)) { *pbuf = PyBytes_AS_STRING(v); *plen = PyBytes_GET_SIZE(v); - Py_INCREF(v); - return v; + return Py_NewRef(v); } if (PyByteArray_Check(v)) { *pbuf = PyByteArray_AS_STRING(v); *plen = PyByteArray_GET_SIZE(v); - Py_INCREF(v); - return v; + return Py_NewRef(v); } /* does it support __bytes__? */ func = _PyObject_LookupSpecial(v, &_Py_ID(__bytes__)); @@ -1411,13 +1406,11 @@ bytes_concat(PyObject *a, PyObject *b) /* Optimize end cases */ if (va.len == 0 && PyBytes_CheckExact(b)) { - result = b; - Py_INCREF(result); + result = Py_NewRef(b); goto done; } if (vb.len == 0 && PyBytes_CheckExact(a)) { - result = a; - Py_INCREF(result); + result = Py_NewRef(a); goto done; } @@ -1458,8 +1451,7 @@ bytes_repeat(PyBytesObject *a, Py_ssize_t n) } size = Py_SIZE(a) * n; if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) { - Py_INCREF(a); - return (PyObject *)a; + return Py_NewRef(a); } nbytes = (size_t)size; if (nbytes + PyBytesObject_SIZE <= nbytes) { @@ -1625,8 +1617,7 @@ bytes_subscript(PyBytesObject* self, PyObject* item) else if (start == 0 && step == 1 && slicelength == PyBytes_GET_SIZE(self) && PyBytes_CheckExact(self)) { - Py_INCREF(self); - return (PyObject *)self; + return Py_NewRef(self); } else if (step == 1) { return PyBytes_FromStringAndSize( @@ -1696,8 +1687,7 @@ bytes___bytes___impl(PyBytesObject *self) /*[clinic end generated code: output=63a306a9bc0caac5 input=34ec5ddba98bd6bb]*/ { if (PyBytes_CheckExact(self)) { - Py_INCREF(self); - return (PyObject *)self; + return Py_NewRef(self); } else { return PyBytes_FromStringAndSize(self->ob_sval, Py_SIZE(self)); @@ -1922,8 +1912,7 @@ do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj) PyBuffer_Release(&vsep); if (i == 0 && j == len && PyBytes_CheckExact(self)) { - Py_INCREF(self); - return (PyObject*)self; + return Py_NewRef(self); } else return PyBytes_FromStringAndSize(s+i, j-i); @@ -1952,8 +1941,7 @@ do_strip(PyBytesObject *self, int striptype) } if (i == 0 && j == len && PyBytes_CheckExact(self)) { - Py_INCREF(self); - return (PyObject*)self; + return Py_NewRef(self); } else return PyBytes_FromStringAndSize(s+i, j-i); @@ -2152,8 +2140,7 @@ bytes_translate_impl(PyBytesObject *self, PyObject *table, } if (!changed && PyBytes_CheckExact(input_obj)) { Py_DECREF(result); - Py_INCREF(input_obj); - return input_obj; + return Py_NewRef(input_obj); } /* Fix the size of the resulting byte string */ if (inlen > 0) @@ -2245,8 +2232,7 @@ bytes_removeprefix_impl(PyBytesObject *self, Py_buffer *prefix) } if (PyBytes_CheckExact(self)) { - Py_INCREF(self); - return (PyObject *)self; + return Py_NewRef(self); } return PyBytes_FromStringAndSize(self_start, self_len); @@ -2284,8 +2270,7 @@ bytes_removesuffix_impl(PyBytesObject *self, Py_buffer *suffix) } if (PyBytes_CheckExact(self)) { - Py_INCREF(self); - return (PyObject *)self; + return Py_NewRef(self); } return PyBytes_FromStringAndSize(self_start, self_len); @@ -2844,8 +2829,7 @@ PyBytes_FromObject(PyObject *x) } if (PyBytes_CheckExact(x)) { - Py_INCREF(x); - return x; + return Py_NewRef(x); } /* Use the modern buffer interface */ @@ -3269,8 +3253,7 @@ bytes_iter(PyObject *seq) if (it == NULL) return NULL; it->it_index = 0; - Py_INCREF(seq); - it->it_seq = (PyBytesObject *)seq; + it->it_seq = (PyBytesObject *)Py_NewRef(seq); _PyObject_GC_TRACK(it); return (PyObject *)it; } diff --git a/Objects/call.c b/Objects/call.c index c2509db2a9a263..c4d31d0a27ece6 100644 --- a/Objects/call.c +++ b/Objects/call.c @@ -1000,8 +1000,7 @@ _PyStack_UnpackDict(PyThreadState *tstate, /* Copy positional arguments */ for (Py_ssize_t i = 0; i < nargs; i++) { - Py_INCREF(args[i]); - stack[i] = args[i]; + stack[i] = Py_NewRef(args[i]); } PyObject **kwstack = stack + nargs; @@ -1013,10 +1012,8 @@ _PyStack_UnpackDict(PyThreadState *tstate, unsigned long keys_are_strings = Py_TPFLAGS_UNICODE_SUBCLASS; while (PyDict_Next(kwargs, &pos, &key, &value)) { keys_are_strings &= Py_TYPE(key)->tp_flags; - Py_INCREF(key); - Py_INCREF(value); - PyTuple_SET_ITEM(kwnames, i, key); - kwstack[i] = value; + PyTuple_SET_ITEM(kwnames, i, Py_NewRef(key)); + kwstack[i] = Py_NewRef(value); i++; } diff --git a/Objects/cellobject.c b/Objects/cellobject.c index 1ddf4c5d8b8bdc..f516707f6f8086 100644 --- a/Objects/cellobject.c +++ b/Objects/cellobject.c @@ -11,8 +11,7 @@ PyCell_New(PyObject *obj) op = (PyCellObject *)PyObject_GC_New(PyCellObject, &PyCell_Type); if (op == NULL) return NULL; - op->ob_ref = obj; - Py_XINCREF(obj); + op->ob_ref = Py_XNewRef(obj); _PyObject_GC_TRACK(op); return (PyObject *)op; @@ -68,8 +67,7 @@ PyCell_Set(PyObject *op, PyObject *value) return -1; } PyObject *old_value = PyCell_GET(op); - Py_XINCREF(value); - PyCell_SET(op, value); + PyCell_SET(op, Py_XNewRef(value)); Py_XDECREF(old_value); return 0; } @@ -135,15 +133,13 @@ cell_get_contents(PyCellObject *op, void *closure) PyErr_SetString(PyExc_ValueError, "Cell is empty"); return NULL; } - Py_INCREF(op->ob_ref); - return op->ob_ref; + return Py_NewRef(op->ob_ref); } static int cell_set_contents(PyCellObject *op, PyObject *obj, void *Py_UNUSED(ignored)) { - Py_XINCREF(obj); - Py_XSETREF(op->ob_ref, obj); + Py_XSETREF(op->ob_ref, Py_XNewRef(obj)); return 0; } diff --git a/Objects/classobject.c b/Objects/classobject.c index b9708ba0e41a3b..eedf8f0e1e1acf 100644 --- a/Objects/classobject.c +++ b/Objects/classobject.c @@ -113,10 +113,8 @@ PyMethod_New(PyObject *func, PyObject *self) return NULL; } im->im_weakreflist = NULL; - Py_INCREF(func); - im->im_func = func; - Py_INCREF(self); - im->im_self = self; + im->im_func = Py_NewRef(func); + im->im_self = Py_NewRef(self); im->vectorcall = method_vectorcall; _PyObject_GC_TRACK(im); return (PyObject *)im; @@ -195,8 +193,7 @@ method_getattro(PyObject *obj, PyObject *name) if (f != NULL) return f(descr, obj, (PyObject *)Py_TYPE(obj)); else { - Py_INCREF(descr); - return descr; + return Py_NewRef(descr); } } @@ -267,8 +264,7 @@ method_richcompare(PyObject *self, PyObject *other, int op) res = eq ? Py_True : Py_False; else res = eq ? Py_False : Py_True; - Py_INCREF(res); - return res; + return Py_NewRef(res); } static PyObject * @@ -359,8 +355,7 @@ PyInstanceMethod_New(PyObject *func) { method = PyObject_GC_New(PyInstanceMethodObject, &PyInstanceMethod_Type); if (method == NULL) return NULL; - Py_INCREF(func); - method->func = func; + method->func = Py_NewRef(func); _PyObject_GC_TRACK(method); return (PyObject *)method; } @@ -412,8 +407,7 @@ instancemethod_getattro(PyObject *self, PyObject *name) if (f != NULL) return f(descr, self, (PyObject *)Py_TYPE(self)); else { - Py_INCREF(descr); - return descr; + return Py_NewRef(descr); } } @@ -443,8 +437,7 @@ static PyObject * instancemethod_descr_get(PyObject *descr, PyObject *obj, PyObject *type) { PyObject *func = PyInstanceMethod_GET_FUNCTION(descr); if (obj == NULL) { - Py_INCREF(func); - return func; + return Py_NewRef(func); } else return PyMethod_New(func, obj); @@ -472,8 +465,7 @@ instancemethod_richcompare(PyObject *self, PyObject *other, int op) res = eq ? Py_True : Py_False; else res = eq ? Py_False : Py_True; - Py_INCREF(res); - return res; + return Py_NewRef(res); } static PyObject * diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 3824fc0f5a4e10..fc1db72977aa01 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -175,8 +175,7 @@ void _Py_set_localsplus_info(int offset, PyObject *name, _PyLocals_Kind kind, PyObject *names, PyObject *kinds) { - Py_INCREF(name); - PyTuple_SET_ITEM(names, offset, name); + PyTuple_SET_ITEM(names, offset, Py_NewRef(name)); _PyLocals_SetKind(kinds, offset, kind); } @@ -235,8 +234,7 @@ get_localsplus_names(PyCodeObject *co, _PyLocals_Kind kind, int num) } assert(index < num); PyObject *name = PyTuple_GET_ITEM(co->co_localsplusnames, offset); - Py_INCREF(name); - PyTuple_SET_ITEM(names, index, name); + PyTuple_SET_ITEM(names, index, Py_NewRef(name)); index += 1; } assert(index == num); @@ -311,27 +309,19 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con) get_localsplus_counts(con->localsplusnames, con->localspluskinds, &nlocals, &nplaincellvars, &ncellvars, &nfreevars); - Py_INCREF(con->filename); - co->co_filename = con->filename; - Py_INCREF(con->name); - co->co_name = con->name; - Py_INCREF(con->qualname); - co->co_qualname = con->qualname; + co->co_filename = Py_NewRef(con->filename); + co->co_name = Py_NewRef(con->name); + co->co_qualname = Py_NewRef(con->qualname); co->co_flags = con->flags; co->co_firstlineno = con->firstlineno; - Py_INCREF(con->linetable); - co->co_linetable = con->linetable; + co->co_linetable = Py_NewRef(con->linetable); - Py_INCREF(con->consts); - co->co_consts = con->consts; - Py_INCREF(con->names); - co->co_names = con->names; + co->co_consts = Py_NewRef(con->consts); + co->co_names = Py_NewRef(con->names); - Py_INCREF(con->localsplusnames); - co->co_localsplusnames = con->localsplusnames; - Py_INCREF(con->localspluskinds); - co->co_localspluskinds = con->localspluskinds; + co->co_localsplusnames = Py_NewRef(con->localsplusnames); + co->co_localspluskinds = Py_NewRef(con->localspluskinds); co->co_argcount = con->argcount; co->co_posonlyargcount = con->posonlyargcount; @@ -339,8 +329,7 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con) co->co_stacksize = con->stacksize; - Py_INCREF(con->exceptiontable); - co->co_exceptiontable = con->exceptiontable; + co->co_exceptiontable = Py_NewRef(con->exceptiontable); /* derived values */ co->co_nlocalsplus = nlocalsplus; @@ -1144,8 +1133,7 @@ lineiter_next(lineiterator *li) start = PyLong_FromLong(bounds->ar_start); end = PyLong_FromLong(bounds->ar_end); if (bounds->ar_line < 0) { - Py_INCREF(Py_None); - line = Py_None; + line = Py_NewRef(Py_None); } else { line = PyLong_FromLong(bounds->ar_line); @@ -1215,8 +1203,7 @@ new_linesiterator(PyCodeObject *code) if (li == NULL) { return NULL; } - Py_INCREF(code); - li->li_code = code; + li->li_code = (PyCodeObject*)Py_NewRef(code); _PyCode_InitAddressRange(code, &li->li_line); return li; } @@ -1315,8 +1302,7 @@ code_positionsiterator(PyCodeObject* code, PyObject* Py_UNUSED(args)) if (pi == NULL) { return NULL; } - Py_INCREF(code); - pi->pi_code = code; + pi->pi_code = (PyCodeObject*)Py_NewRef(code); _PyCode_InitAddressRange(code, &pi->pi_range); pi->pi_offset = pi->pi_range.ar_end; return (PyObject*)pi; @@ -1777,8 +1763,7 @@ code_richcompare(PyObject *self, PyObject *other, int op) res = Py_False; done: - Py_INCREF(res); - return res; + return Py_NewRef(res); } static Py_hash_t @@ -2030,8 +2015,7 @@ code__varname_from_oparg_impl(PyCodeObject *self, int oparg) if (name == NULL) { return NULL; } - Py_INCREF(name); - return name; + return Py_NewRef(name); } /* XXX code objects need to participate in GC? */ @@ -2106,8 +2090,7 @@ _PyCode_ConstantKey(PyObject *op) { /* Objects of these types are always different from object of other * type and from tuples. */ - Py_INCREF(op); - key = op; + key = Py_NewRef(op); } else if (PyBool_Check(op) || PyBytes_CheckExact(op)) { /* Make booleans different from integers 0 and 1. diff --git a/Objects/complexobject.c b/Objects/complexobject.c index 9bd68d50c30ae0..aee03ddfb07aec 100644 --- a/Objects/complexobject.c +++ b/Objects/complexobject.c @@ -449,8 +449,7 @@ to_complex(PyObject **pobj, Py_complex *pc) pc->real = PyFloat_AsDouble(obj); return 0; } - Py_INCREF(Py_NotImplemented); - *pobj = Py_NotImplemented; + *pobj = Py_NewRef(Py_NotImplemented); return -1; } @@ -553,8 +552,7 @@ static PyObject * complex_pos(PyComplexObject *v) { if (PyComplex_CheckExact(v)) { - Py_INCREF(v); - return (PyObject *)v; + return Py_NewRef(v); } else return PyComplex_FromCComplex(v->cval); @@ -631,8 +629,7 @@ complex_richcompare(PyObject *v, PyObject *w, int op) else res = Py_False; - Py_INCREF(res); - return res; + return Py_NewRef(res); Unimplemented: Py_RETURN_NOTIMPLEMENTED; @@ -705,8 +702,7 @@ complex___complex___impl(PyComplexObject *self) /*[clinic end generated code: output=e6b35ba3d275dc9c input=3589ada9d27db854]*/ { if (PyComplex_CheckExact(self)) { - Py_INCREF(self); - return (PyObject *)self; + return Py_NewRef(self); } else { return PyComplex_FromCComplex(self->cval); @@ -917,8 +913,7 @@ complex_new_impl(PyTypeObject *type, PyObject *r, PyObject *i) to exact complexes here. If either the input or the output is a complex subclass, it will be handled below as a non-orthogonal vector. */ - Py_INCREF(r); - return r; + return Py_NewRef(r); } if (PyUnicode_Check(r)) { if (i != NULL) { diff --git a/Objects/descrobject.c b/Objects/descrobject.c index a2974f91aaaec3..550bfb36df0f66 100644 --- a/Objects/descrobject.c +++ b/Objects/descrobject.c @@ -622,8 +622,7 @@ descr_get_qualname(PyDescrObject *descr, void *Py_UNUSED(ignored)) { if (descr->d_qualname == NULL) descr->d_qualname = calculate_qualname(descr); - Py_XINCREF(descr->d_qualname); - return descr->d_qualname; + return Py_XNewRef(descr->d_qualname); } static PyObject * @@ -904,8 +903,7 @@ descr_new(PyTypeObject *descrtype, PyTypeObject *type, const char *name) descr = (PyDescrObject *)PyType_GenericAlloc(descrtype, 0); if (descr != NULL) { - Py_XINCREF(type); - descr->d_type = type; + descr->d_type = (PyTypeObject*)Py_XNewRef(type); descr->d_name = PyUnicode_InternFromString(name); if (descr->d_name == NULL) { Py_DECREF(descr); @@ -1244,8 +1242,7 @@ mappingproxy_new_impl(PyTypeObject *type, PyObject *mapping) mappingproxy = PyObject_GC_New(mappingproxyobject, &PyDictProxy_Type); if (mappingproxy == NULL) return NULL; - Py_INCREF(mapping); - mappingproxy->mapping = mapping; + mappingproxy->mapping = Py_NewRef(mapping); _PyObject_GC_TRACK(mappingproxy); return (PyObject *)mappingproxy; } @@ -1260,8 +1257,7 @@ PyDictProxy_New(PyObject *mapping) pp = PyObject_GC_New(mappingproxyobject, &PyDictProxy_Type); if (pp != NULL) { - Py_INCREF(mapping); - pp->mapping = mapping; + pp->mapping = Py_NewRef(mapping); _PyObject_GC_TRACK(pp); } return (PyObject *)pp; @@ -1361,8 +1357,7 @@ wrapper_objclass(wrapperobject *wp, void *Py_UNUSED(ignored)) { PyObject *c = (PyObject *)PyDescr_TYPE(wp->descr); - Py_INCREF(c); - return c; + return Py_NewRef(c); } static PyObject * @@ -1466,10 +1461,8 @@ PyWrapper_New(PyObject *d, PyObject *self) wp = PyObject_GC_New(wrapperobject, &_PyMethodWrapper_Type); if (wp != NULL) { - Py_INCREF(descr); - wp->descr = descr; - Py_INCREF(self); - wp->self = self; + wp->descr = (PyWrapperDescrObject*)Py_NewRef(descr); + wp->self = Py_NewRef(self); _PyObject_GC_TRACK(wp); } return (PyObject *)wp; @@ -1566,8 +1559,7 @@ property_set_name(PyObject *self, PyObject *args) { propertyobject *prop = (propertyobject *)self; PyObject *name = PyTuple_GET_ITEM(args, 1); - Py_XINCREF(name); - Py_XSETREF(prop->prop_name, name); + Py_XSETREF(prop->prop_name, Py_XNewRef(name)); Py_RETURN_NONE; } @@ -1599,8 +1591,7 @@ static PyObject * property_descr_get(PyObject *self, PyObject *obj, PyObject *type) { if (obj == NULL || obj == Py_None) { - Py_INCREF(self); - return self; + return Py_NewRef(self); } propertyobject *gs = (propertyobject *)self; @@ -1722,8 +1713,7 @@ property_copy(PyObject *old, PyObject *get, PyObject *set, PyObject *del) if (new == NULL) return NULL; - Py_XINCREF(pold->prop_name); - Py_XSETREF(((propertyobject *) new)->prop_name, pold->prop_name); + Py_XSETREF(((propertyobject *) new)->prop_name, Py_XNewRef(pold->prop_name)); return new; } @@ -1776,13 +1766,9 @@ property_init_impl(propertyobject *self, PyObject *fget, PyObject *fset, if (fdel == Py_None) fdel = NULL; - Py_XINCREF(fget); - Py_XINCREF(fset); - Py_XINCREF(fdel); - - Py_XSETREF(self->prop_get, fget); - Py_XSETREF(self->prop_set, fset); - Py_XSETREF(self->prop_del, fdel); + Py_XSETREF(self->prop_get, Py_XNewRef(fget)); + Py_XSETREF(self->prop_set, Py_XNewRef(fset)); + Py_XSETREF(self->prop_del, Py_XNewRef(fdel)); Py_XSETREF(self->prop_doc, NULL); Py_XSETREF(self->prop_name, NULL); @@ -1790,8 +1776,7 @@ property_init_impl(propertyobject *self, PyObject *fget, PyObject *fset, PyObject *prop_doc = NULL; if (doc != NULL && doc != Py_None) { - prop_doc = doc; - Py_XINCREF(prop_doc); + prop_doc = Py_XNewRef(doc); } /* if no docstring given and the getter has one, use that one */ else if (fget != NULL) { @@ -1820,8 +1805,7 @@ property_init_impl(propertyobject *self, PyObject *fget, PyObject *fset, class's dict. */ if (prop_doc == NULL) { - prop_doc = Py_None; - Py_INCREF(prop_doc); + prop_doc = Py_NewRef(Py_None); } int err = PyObject_SetAttr( (PyObject *)self, &_Py_ID(__doc__), prop_doc); From 873da3158804d77d5d487af0d5d61b4f5d45f5e1 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 10 Nov 2022 16:27:53 +0100 Subject: [PATCH 05/29] gh-99300: Use Py_NewRef() in Objects/dictobject.c (#99333) Replace Py_INCREF() and Py_XINCREF() with Py_NewRef() and Py_XNewRef() in Objects/dictobject.c. --- Objects/dictobject.c | 140 +++++++++++++++---------------------------- 1 file changed, 48 insertions(+), 92 deletions(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 97007479b1be91..fc487203869c0e 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -1200,7 +1200,6 @@ insert_into_dictkeys(PyDictKeysObject *keys, PyObject *name) if (keys->dk_usable <= 0) { return DKIX_EMPTY; } - Py_INCREF(name); /* Insert into new slot. */ keys->dk_version = 0; Py_ssize_t hashpos = find_empty_slot(keys, hash); @@ -1208,7 +1207,7 @@ insert_into_dictkeys(PyDictKeysObject *keys, PyObject *name) PyDictUnicodeEntry *ep = &DK_UNICODE_ENTRIES(keys)[ix]; dictkeys_set_index(keys, hashpos, ix); assert(ep->me_key == NULL); - ep->me_key = name; + ep->me_key = Py_NewRef(name); keys->dk_usable--; keys->dk_nentries++; } @@ -1453,8 +1452,7 @@ dictresize(PyDictObject *mp, uint8_t log2_newsize, int unicode) int index = get_index_from_order(mp, i); PyDictUnicodeEntry *ep = &oldentries[index]; assert(oldvalues->values[index] != NULL); - Py_INCREF(ep->me_key); - newentries[i].me_key = ep->me_key; + newentries[i].me_key = Py_NewRef(ep->me_key); newentries[i].me_hash = unicode_get_hash(ep->me_key); newentries[i].me_value = oldvalues->values[index]; } @@ -1467,8 +1465,7 @@ dictresize(PyDictObject *mp, uint8_t log2_newsize, int unicode) int index = get_index_from_order(mp, i); PyDictUnicodeEntry *ep = &oldentries[index]; assert(oldvalues->values[index] != NULL); - Py_INCREF(ep->me_key); - newentries[i].me_key = ep->me_key; + newentries[i].me_key = Py_NewRef(ep->me_key); newentries[i].me_value = oldvalues->values[index]; } build_indices_unicode(mp->ma_keys, newentries, numentries); @@ -1867,9 +1864,8 @@ PyDict_SetItem(PyObject *op, PyObject *key, PyObject *value) } assert(key); assert(value); - Py_INCREF(key); - Py_INCREF(value); - return _PyDict_SetItem_Take2((PyDictObject *)op, key, value); + return _PyDict_SetItem_Take2((PyDictObject *)op, + Py_NewRef(key), Py_NewRef(value)); } int @@ -2185,8 +2181,7 @@ _PyDict_Pop_KnownHash(PyObject *dict, PyObject *key, Py_hash_t hash, PyObject *d if (mp->ma_used == 0) { if (deflt) { - Py_INCREF(deflt); - return deflt; + return Py_NewRef(deflt); } _PyErr_SetKeyError(key); return NULL; @@ -2196,8 +2191,7 @@ _PyDict_Pop_KnownHash(PyObject *dict, PyObject *key, Py_hash_t hash, PyObject *d return NULL; if (ix == DKIX_EMPTY || old_value == NULL) { if (deflt) { - Py_INCREF(deflt); - return deflt; + return Py_NewRef(deflt); } _PyErr_SetKeyError(key); return NULL; @@ -2218,8 +2212,7 @@ _PyDict_Pop(PyObject *dict, PyObject *key, PyObject *deflt) if (((PyDictObject *)dict)->ma_used == 0) { if (deflt) { - Py_INCREF(deflt); - return deflt; + return Py_NewRef(deflt); } _PyErr_SetKeyError(key); return NULL; @@ -2260,9 +2253,7 @@ _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value) } while (_PyDict_Next(iterable, &pos, &key, &oldvalue, &hash)) { - Py_INCREF(key); - Py_INCREF(value); - if (insertdict(mp, key, hash, value)) { + if (insertdict(mp, Py_NewRef(key), hash, Py_NewRef(value))) { Py_DECREF(d); return NULL; } @@ -2281,9 +2272,7 @@ _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value) } while (_PySet_NextEntry(iterable, &pos, &key, &hash)) { - Py_INCREF(key); - Py_INCREF(value); - if (insertdict(mp, key, hash, value)) { + if (insertdict(mp, Py_NewRef(key), hash, Py_NewRef(value))) { Py_DECREF(d); return NULL; } @@ -2489,8 +2478,7 @@ dict_subscript(PyDictObject *mp, PyObject *key) _PyErr_SetKeyError(key); return NULL; } - Py_INCREF(value); - return value; + return Py_NewRef(value); } static int @@ -2532,8 +2520,7 @@ dict_keys(PyDictObject *mp) PyObject *key; while (_PyDict_Next((PyObject*)mp, &pos, &key, NULL, NULL)) { assert(j < n); - Py_INCREF(key); - PyList_SET_ITEM(v, j, key); + PyList_SET_ITEM(v, j, Py_NewRef(key)); j++; } assert(j == n); @@ -2564,8 +2551,7 @@ dict_values(PyDictObject *mp) PyObject *value; while (_PyDict_Next((PyObject*)mp, &pos, NULL, &value, NULL)) { assert(j < n); - Py_INCREF(value); - PyList_SET_ITEM(v, j, value); + PyList_SET_ITEM(v, j, Py_NewRef(value)); j++; } assert(j == n); @@ -2610,10 +2596,8 @@ dict_items(PyDictObject *mp) while (_PyDict_Next((PyObject*)mp, &pos, &key, &value, NULL)) { assert(j < n); PyObject *item = PyList_GET_ITEM(v, j); - Py_INCREF(key); - PyTuple_SET_ITEM(item, 0, key); - Py_INCREF(value); - PyTuple_SET_ITEM(item, 1, value); + PyTuple_SET_ITEM(item, 0, Py_NewRef(key)); + PyTuple_SET_ITEM(item, 1, Py_NewRef(value)); j++; } assert(j == n); @@ -2865,16 +2849,12 @@ dict_merge(PyObject *a, PyObject *b, int override) Py_INCREF(key); Py_INCREF(value); if (override == 1) { - Py_INCREF(key); - Py_INCREF(value); - err = insertdict(mp, key, hash, value); + err = insertdict(mp, Py_NewRef(key), hash, Py_NewRef(value)); } else { err = _PyDict_Contains_KnownHash(a, key, hash); if (err == 0) { - Py_INCREF(key); - Py_INCREF(value); - err = insertdict(mp, key, hash, value); + err = insertdict(mp, Py_NewRef(key), hash, Py_NewRef(value)); } else if (err > 0) { if (override != 0) { @@ -3021,8 +3001,7 @@ PyDict_Copy(PyObject *o) dictkeys_incref(mp->ma_keys); for (i = 0, n = size; i < n; i++) { PyObject *value = mp->ma_values->values[i]; - Py_XINCREF(value); - split_copy->ma_values->values[i] = value; + split_copy->ma_values->values[i] = Py_XNewRef(value); } if (_PyObject_GC_IS_TRACKED(mp)) _PyObject_GC_TRACK(split_copy); @@ -3197,8 +3176,7 @@ dict_richcompare(PyObject *v, PyObject *w, int op) } else res = Py_NotImplemented; - Py_INCREF(res); - return res; + return Py_NewRef(res); } /*[clinic input] @@ -3263,8 +3241,7 @@ dict_get_impl(PyDictObject *self, PyObject *key, PyObject *default_value) if (ix == DKIX_EMPTY || val == NULL) { val = default_value; } - Py_INCREF(val); - return val; + return Py_NewRef(val); } PyObject * @@ -3286,9 +3263,8 @@ PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj) } if (mp->ma_keys == Py_EMPTY_KEYS) { - Py_INCREF(key); - Py_INCREF(defaultobj); - if (insert_to_emptydict(mp, key, hash, defaultobj) < 0) { + if (insert_to_emptydict(mp, Py_NewRef(key), hash, + Py_NewRef(defaultobj)) < 0) { return NULL; } return defaultobj; @@ -3318,26 +3294,24 @@ PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj) if (DK_IS_UNICODE(mp->ma_keys)) { assert(PyUnicode_CheckExact(key)); PyDictUnicodeEntry *ep = &DK_UNICODE_ENTRIES(mp->ma_keys)[mp->ma_keys->dk_nentries]; - ep->me_key = key; + ep->me_key = Py_NewRef(key); if (_PyDict_HasSplitTable(mp)) { Py_ssize_t index = (int)mp->ma_keys->dk_nentries; assert(index < SHARED_KEYS_MAX_SIZE); assert(mp->ma_values->values[index] == NULL); - mp->ma_values->values[index] = value; + mp->ma_values->values[index] = Py_NewRef(value); _PyDictValues_AddToInsertionOrder(mp->ma_values, index); } else { - ep->me_value = value; + ep->me_value = Py_NewRef(value); } } else { PyDictKeyEntry *ep = &DK_ENTRIES(mp->ma_keys)[mp->ma_keys->dk_nentries]; - ep->me_key = key; + ep->me_key = Py_NewRef(key); ep->me_hash = hash; - ep->me_value = value; + ep->me_value = Py_NewRef(value); } - Py_INCREF(key); - Py_INCREF(value); MAINTAIN_TRACKING(mp, key, value); mp->ma_used++; mp->ma_version_tag = new_version; @@ -3350,9 +3324,8 @@ PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj) value = defaultobj; assert(_PyDict_HasSplitTable(mp)); assert(mp->ma_values->values[ix] == NULL); - Py_INCREF(value); MAINTAIN_TRACKING(mp, key, value); - mp->ma_values->values[ix] = value; + mp->ma_values->values[ix] = Py_NewRef(value); _PyDictValues_AddToInsertionOrder(mp->ma_values, ix); mp->ma_used++; mp->ma_version_tag = new_version; @@ -3382,8 +3355,7 @@ dict_setdefault_impl(PyDictObject *self, PyObject *key, PyObject *val; val = PyDict_SetDefault((PyObject *)self, key, default_value); - Py_XINCREF(val); - return val; + return Py_XNewRef(val); } static PyObject * @@ -3603,8 +3575,7 @@ dict_ior(PyObject *self, PyObject *other) if (dict_update_arg(self, other)) { return NULL; } - Py_INCREF(self); - return self; + return Py_NewRef(self); } PyDoc_STRVAR(getitem__doc__, @@ -3942,8 +3913,7 @@ dictiter_new(PyDictObject *dict, PyTypeObject *itertype) if (di == NULL) { return NULL; } - Py_INCREF(dict); - di->di_dict = dict; + di->di_dict = (PyDictObject*)Py_NewRef(dict); di->di_used = dict->ma_used; di->len = dict->ma_used; if (itertype == &PyDictRevIterKey_Type || @@ -4077,8 +4047,7 @@ dictiter_iternextkey(dictiterobject *di) } di->di_pos = i+1; di->len--; - Py_INCREF(key); - return key; + return Py_NewRef(key); fail: di->di_dict = NULL; @@ -4177,8 +4146,7 @@ dictiter_iternextvalue(dictiterobject *di) } di->di_pos = i+1; di->len--; - Py_INCREF(value); - return value; + return Py_NewRef(value); fail: di->di_dict = NULL; @@ -4280,14 +4248,12 @@ dictiter_iternextitem(dictiterobject *di) } di->di_pos = i+1; di->len--; - Py_INCREF(key); - Py_INCREF(value); result = di->di_result; if (Py_REFCNT(result) == 1) { PyObject *oldkey = PyTuple_GET_ITEM(result, 0); PyObject *oldvalue = PyTuple_GET_ITEM(result, 1); - PyTuple_SET_ITEM(result, 0, key); /* steals reference */ - PyTuple_SET_ITEM(result, 1, value); /* steals reference */ + PyTuple_SET_ITEM(result, 0, Py_NewRef(key)); + PyTuple_SET_ITEM(result, 1, Py_NewRef(value)); Py_INCREF(result); Py_DECREF(oldkey); Py_DECREF(oldvalue); @@ -4301,8 +4267,8 @@ dictiter_iternextitem(dictiterobject *di) result = PyTuple_New(2); if (result == NULL) return NULL; - PyTuple_SET_ITEM(result, 0, key); /* steals reference */ - PyTuple_SET_ITEM(result, 1, value); /* steals reference */ + PyTuple_SET_ITEM(result, 0, Py_NewRef(key)); + PyTuple_SET_ITEM(result, 1, Py_NewRef(value)); } return result; @@ -4406,22 +4372,18 @@ dictreviter_iternext(dictiterobject *di) di->len--; if (Py_IS_TYPE(di, &PyDictRevIterKey_Type)) { - Py_INCREF(key); - return key; + return Py_NewRef(key); } else if (Py_IS_TYPE(di, &PyDictRevIterValue_Type)) { - Py_INCREF(value); - return value; + return Py_NewRef(value); } else if (Py_IS_TYPE(di, &PyDictRevIterItem_Type)) { - Py_INCREF(key); - Py_INCREF(value); result = di->di_result; if (Py_REFCNT(result) == 1) { PyObject *oldkey = PyTuple_GET_ITEM(result, 0); PyObject *oldvalue = PyTuple_GET_ITEM(result, 1); - PyTuple_SET_ITEM(result, 0, key); /* steals reference */ - PyTuple_SET_ITEM(result, 1, value); /* steals reference */ + PyTuple_SET_ITEM(result, 0, Py_NewRef(key)); + PyTuple_SET_ITEM(result, 1, Py_NewRef(value)); Py_INCREF(result); Py_DECREF(oldkey); Py_DECREF(oldvalue); @@ -4436,8 +4398,8 @@ dictreviter_iternext(dictiterobject *di) if (result == NULL) { return NULL; } - PyTuple_SET_ITEM(result, 0, key); /* steals reference */ - PyTuple_SET_ITEM(result, 1, value); /* steals reference */ + PyTuple_SET_ITEM(result, 0, Py_NewRef(key)); + PyTuple_SET_ITEM(result, 1, Py_NewRef(value)); } return result; } @@ -4484,7 +4446,6 @@ dictiter_reduce(dictiterobject *di, PyObject *Py_UNUSED(ignored)) /* copy the iterator state */ dictiterobject tmp = *di; Py_XINCREF(tmp.di_dict); - PyObject *list = PySequence_List((PyObject*)&tmp); Py_XDECREF(tmp.di_dict); if (list == NULL) { @@ -4566,8 +4527,7 @@ _PyDictView_New(PyObject *dict, PyTypeObject *type) dv = PyObject_GC_New(_PyDictViewObject, type); if (dv == NULL) return NULL; - Py_INCREF(dict); - dv->dv_dict = (PyDictObject *)dict; + dv->dv_dict = (PyDictObject *)Py_NewRef(dict); _PyObject_GC_TRACK(dv); return (PyObject *)dv; } @@ -4678,8 +4638,7 @@ dictview_richcompare(PyObject *self, PyObject *other, int op) if (ok < 0) return NULL; result = ok ? Py_True : Py_False; - Py_INCREF(result); - return result; + return Py_NewRef(result); } static PyObject * @@ -5445,8 +5404,7 @@ _PyObject_StoreInstanceAttribute(PyObject *obj, PyDictValues *values, } } PyObject *old_value = values->values[ix]; - Py_XINCREF(value); - values->values[ix] = value; + values->values[ix] = Py_XNewRef(value); if (old_value == NULL) { if (value == NULL) { PyErr_Format(PyExc_AttributeError, @@ -5508,8 +5466,7 @@ _PyObject_GetInstanceAttribute(PyObject *obj, PyDictValues *values, return NULL; } PyObject *value = values->values[ix]; - Py_XINCREF(value); - return value; + return Py_XNewRef(value); } int @@ -5652,8 +5609,7 @@ PyObject_GenericGetDict(PyObject *obj, void *context) } } } - Py_XINCREF(dict); - return dict; + return Py_XNewRef(dict); } int From e0ab5b80e42f96dbecadadbc202c792b78540824 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 10 Nov 2022 09:03:57 -0700 Subject: [PATCH 06/29] gh-90110: Update the C-analyzer Tool (gh-99307) --- Modules/zlibmodule.c | 2 +- Tools/c-analyzer/cpython/_parser.py | 6 ++ Tools/c-analyzer/cpython/globals-to-fix.tsv | 91 +-------------------- Tools/c-analyzer/cpython/ignored.tsv | 5 ++ 4 files changed, 14 insertions(+), 90 deletions(-) diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c index 2a490ed5d183a7..30c2515f61f7e4 100644 --- a/Modules/zlibmodule.c +++ b/Modules/zlibmodule.c @@ -1718,7 +1718,7 @@ ZlibDecompressor__new__(PyTypeObject *cls, PyObject *kwargs) { static char *keywords[] = {"wbits", "zdict", NULL}; - static char *format = "|iO:_ZlibDecompressor"; + static const char * const format = "|iO:_ZlibDecompressor"; int wbits = MAX_WBITS; PyObject *zdict = NULL; zlibstate *state = PyType_GetModuleState(cls); diff --git a/Tools/c-analyzer/cpython/_parser.py b/Tools/c-analyzer/cpython/_parser.py index 78241f0ea08ac8..769e27432f2837 100644 --- a/Tools/c-analyzer/cpython/_parser.py +++ b/Tools/c-analyzer/cpython/_parser.py @@ -82,6 +82,10 @@ def clean_lines(text): # generated Python/deepfreeze/*.c Python/frozen_modules/*.h +Python/generated_cases.c.h + +# not actually source +Python/bytecodes.c # @end=conf@ ''') @@ -285,6 +289,7 @@ def clean_lines(text): SAME = { _abs('Include/*.h'): [_abs('Include/cpython/')], + _abs('Python/ceval.c'): ['Python/generated_cases.c.h'], } MAX_SIZES = { @@ -311,6 +316,7 @@ def clean_lines(text): _abs('Python/frozen_modules/*.h'): (20_000, 500), _abs('Python/opcode_targets.h'): (10_000, 500), _abs('Python/stdlib_module_names.h'): (5_000, 500), + _abs('Python/importlib.h'): (200_000, 5000), # These large files are currently ignored (see above). _abs('Modules/_ssl_data.h'): (80_000, 10_000), diff --git a/Tools/c-analyzer/cpython/globals-to-fix.tsv b/Tools/c-analyzer/cpython/globals-to-fix.tsv index 56e499dcf98836..4cd29a8a0b0cb6 100644 --- a/Tools/c-analyzer/cpython/globals-to-fix.tsv +++ b/Tools/c-analyzer/cpython/globals-to-fix.tsv @@ -304,20 +304,14 @@ Objects/sliceobject.c - _Py_EllipsisObject - # manually cached PyUnicodeObject Python/ast_unparse.c - _str_replace_inf - -# holds statically-initialized strings -Objects/typeobject.c - slotdefs - - # other Objects/typeobject.c object___reduce_ex___impl objreduce - -Objects/unicodeobject.c - _string_module - -Objects/unicodeobject.c - interned - #----------------------- # other # initialized once Python/context.c - _token_missing - -Python/fileutils.c - _Py_open_cloexec_works - Python/hamt.c - _empty_bitmap_node - Python/hamt.c - _empty_hamt - @@ -384,6 +378,7 @@ Python/perf_trampoline.c - perf_map_file - Objects/unicodeobject.c - ucnhash_capi - Parser/action_helpers.c _PyPegen_dummy_name cache - Python/dtoa.c - p5s - +Python/fileutils.c - _Py_open_cloexec_works - Python/fileutils.c - force_ascii - Python/fileutils.c set_inheritable ioctl_works - Python/import.c - import_lock - @@ -511,6 +506,7 @@ Modules/_testcapi/vectorcall.c - MethodDescriptorNopGet_Type - Modules/_testcapi/vectorcall.c - MethodDescriptor2_Type - Modules/itertoolsmodule.c - _grouper_type - Modules/itertoolsmodule.c - accumulate_type - +Modules/itertoolsmodule.c - batched_type - Modules/itertoolsmodule.c - chain_type - Modules/itertoolsmodule.c - combinations_type - Modules/itertoolsmodule.c - compress_type - @@ -701,89 +697,6 @@ Modules/xxmodule.c - ErrorObject - #----------------------- # cached - initialized once -# _Py_IDENTIFIER (global) -Modules/_asynciomodule.c - PyId___asyncio_running_event_loop__ - -Modules/_asynciomodule.c - PyId__asyncio_future_blocking - -Modules/_asynciomodule.c - PyId_add_done_callback - -Modules/_asynciomodule.c - PyId_call_soon - -Modules/_asynciomodule.c - PyId_cancel - -Modules/_asynciomodule.c - PyId_get_event_loop - -Modules/_asynciomodule.c - PyId_throw - -Modules/_datetimemodule.c - PyId_as_integer_ratio - -Modules/_datetimemodule.c - PyId_fromutc - -Modules/_datetimemodule.c - PyId_isoformat - -Modules/_datetimemodule.c - PyId_strftime - - -# _Py_IDENTIFIER (local) -Modules/_asynciomodule.c FutureObj_finalize PyId_call_exception_handler - -Modules/_asynciomodule.c FutureObj_finalize PyId_exception - -Modules/_asynciomodule.c FutureObj_finalize PyId_future - -Modules/_asynciomodule.c FutureObj_finalize PyId_message - -Modules/_asynciomodule.c FutureObj_finalize PyId_source_traceback - -Modules/_asynciomodule.c FutureObj_get_state PyId_CANCELLED - -Modules/_asynciomodule.c FutureObj_get_state PyId_FINISHED - -Modules/_asynciomodule.c FutureObj_get_state PyId_PENDING - -Modules/_asynciomodule.c TaskObj_finalize PyId_call_exception_handler - -Modules/_asynciomodule.c TaskObj_finalize PyId_message - -Modules/_asynciomodule.c TaskObj_finalize PyId_source_traceback - -Modules/_asynciomodule.c TaskObj_finalize PyId_task - -Modules/_asynciomodule.c future_init PyId_get_debug - -Modules/_asynciomodule.c get_future_loop PyId__loop - -Modules/_asynciomodule.c get_future_loop PyId_get_loop - -Modules/_asynciomodule.c register_task PyId_add - -Modules/_asynciomodule.c unregister_task PyId_discard - -Modules/_ctypes/_ctypes.c CDataType_from_param PyId__as_parameter_ - -Modules/_ctypes/_ctypes.c PyCArrayType_new PyId__length_ - -Modules/_ctypes/_ctypes.c PyCArrayType_new PyId__type_ - -Modules/_ctypes/_ctypes.c PyCFuncPtr_set_restype PyId__check_retval_ - -Modules/_ctypes/_ctypes.c PyCPointerType_new PyId__type_ - -Modules/_ctypes/_ctypes.c PyCPointerType_set_type PyId__type_ - -Modules/_ctypes/_ctypes.c PyCSimpleType_from_param PyId__as_parameter_ - -Modules/_ctypes/_ctypes.c PyCSimpleType_new PyId__type_ - -Modules/_ctypes/_ctypes.c StructUnionType_new PyId__abstract_ - -Modules/_ctypes/_ctypes.c StructUnionType_new PyId__fields_ - -Modules/_ctypes/_ctypes.c _build_result PyId___ctypes_from_outparam__ - -Modules/_ctypes/_ctypes.c _init_pos_args PyId__fields_ - -Modules/_ctypes/_ctypes.c c_char_p_from_param PyId__as_parameter_ - -Modules/_ctypes/_ctypes.c c_void_p_from_param PyId__as_parameter_ - -Modules/_ctypes/_ctypes.c c_wchar_p_from_param PyId__as_parameter_ - -Modules/_ctypes/_ctypes.c converters_from_argtypes PyId_from_param - -Modules/_ctypes/_ctypes.c make_funcptrtype_dict PyId__argtypes_ - -Modules/_ctypes/_ctypes.c make_funcptrtype_dict PyId__check_retval_ - -Modules/_ctypes/_ctypes.c make_funcptrtype_dict PyId__flags_ - -Modules/_ctypes/_ctypes.c make_funcptrtype_dict PyId__restype_ - -Modules/_ctypes/callproc.c ConvParam PyId__as_parameter_ - -Modules/_ctypes/callproc.c unpickle PyId___new__ - -Modules/_ctypes/callproc.c unpickle PyId___setstate__ - -Modules/_ctypes/stgdict.c MakeAnonFields PyId__anonymous_ - -Modules/_ctypes/stgdict.c PyCStructUnionType_update_stgdict PyId__pack_ - -Modules/_ctypes/stgdict.c PyCStructUnionType_update_stgdict PyId__swappedbytes_ - -Modules/_ctypes/stgdict.c PyCStructUnionType_update_stgdict PyId__use_broken_old_ctypes_structure_semantics_ - -Modules/_cursesmodule.c _curses_getwin PyId_read - -Modules/_cursesmodule.c _curses_window_putwin PyId_write - -Modules/_cursesmodule.c update_lines_cols PyId_COLS - -Modules/_cursesmodule.c update_lines_cols PyId_LINES - -Modules/_datetimemodule.c call_tzname PyId_tzname - -Modules/_datetimemodule.c date_strftime PyId_timetuple - -Modules/_datetimemodule.c date_today PyId_fromtimestamp - -Modules/_datetimemodule.c datetime_strptime PyId__strptime_datetime - -Modules/_datetimemodule.c make_Zreplacement PyId_replace - -Modules/_datetimemodule.c tzinfo_reduce PyId___getinitargs__ - -Modules/_elementtree.c _elementtree_Element_find_impl PyId_find - -Modules/_elementtree.c _elementtree_Element_findall_impl PyId_findall - -Modules/_elementtree.c _elementtree_Element_findtext_impl PyId_findtext - -Modules/_elementtree.c _elementtree_Element_iterfind_impl PyId_iterfind - -Modules/_elementtree.c expat_start_doctype_handler PyId_doctype - -Modules/_elementtree.c treebuilder_add_subelement PyId_append - -Modules/_elementtree.c treebuilder_flush_data PyId_tail - -Modules/_elementtree.c treebuilder_flush_data PyId_text - -Modules/_json.c _encoded_const PyId_false - -Modules/_json.c _encoded_const PyId_null - -Modules/_json.c _encoded_const PyId_true - -Modules/_json.c raise_errmsg PyId_JSONDecodeError - -Modules/_json.c raise_errmsg PyId_decoder - -Modules/ossaudiodev.c oss_exit PyId_close - - # manually cached PyUnicodeOjbect Modules/_asynciomodule.c - context_kwname - Modules/_ctypes/callproc.c _ctypes_get_errobj error_object_name - diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index ea16ed2f5e941f..6be61c94d87a3d 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -261,6 +261,10 @@ Modules/_testcapimodule.c test_capsule buffer - Modules/_testcapimodule.c test_empty_argparse kwlist - Modules/_testcapimodule.c test_structmembers_new keywords - Modules/_testcapimodule.c getargs_s_hash_int keywords - +Modules/_testcapimodule.c - g_dict_watch_events - +Modules/_testcapimodule.c - g_dict_watchers_installed - +Modules/_testcapimodule.c - g_type_modified_events - +Modules/_testcapimodule.c - g_type_watchers_installed - Modules/_testimportmultiple.c - _barmodule - Modules/_testimportmultiple.c - _foomodule - Modules/_testimportmultiple.c - _testimportmultiple - @@ -467,6 +471,7 @@ Objects/obmalloc.c - _PyObject - Objects/obmalloc.c - usedpools - Python/perf_trampoline.c - _Py_perfmap_callbacks - Objects/typeobject.c - name_op - +Objects/typeobject.c - slotdefs - Objects/unicodeobject.c - stripfuncnames - Objects/unicodeobject.c - utf7_category - Objects/unicodeobject.c unicode_decode_call_errorhandler_wchar argparse - From 882fdecbf824880e41ec67b89eee314186ab4b55 Mon Sep 17 00:00:00 2001 From: Carlo <34414634+csuriano23@users.noreply.github.com> Date: Thu, 10 Nov 2022 17:07:17 +0100 Subject: [PATCH 07/29] gh-99277: remove older version of `get_write_buffer_limits` (#99280) Co-authored-by: Kumar Aditya <59607654+kumaraditya303@users.noreply.github.com> --- Lib/asyncio/sslproto.py | 6 ------ .../Library/2022-11-09-08-40-52.gh-issue-99277.J1P44O.rst | 1 + 2 files changed, 1 insertion(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-11-09-08-40-52.gh-issue-99277.J1P44O.rst diff --git a/Lib/asyncio/sslproto.py b/Lib/asyncio/sslproto.py index 5cb5cd35883f07..bbf9cad6bc7f86 100644 --- a/Lib/asyncio/sslproto.py +++ b/Lib/asyncio/sslproto.py @@ -199,12 +199,6 @@ def get_read_buffer_size(self): """Return the current size of the read buffer.""" return self._ssl_protocol._get_read_buffer_size() - def get_write_buffer_limits(self): - """Get the high and low watermarks for write flow control. - Return a tuple (low, high) where low and high are - positive number of bytes.""" - return self._ssl_protocol._transport.get_write_buffer_limits() - @property def _protocol_paused(self): # Required for sendfile fallback pause_writing/resume_writing logic diff --git a/Misc/NEWS.d/next/Library/2022-11-09-08-40-52.gh-issue-99277.J1P44O.rst b/Misc/NEWS.d/next/Library/2022-11-09-08-40-52.gh-issue-99277.J1P44O.rst new file mode 100644 index 00000000000000..f0a5390b03a7d0 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-11-09-08-40-52.gh-issue-99277.J1P44O.rst @@ -0,0 +1 @@ +Remove older version of ``_SSLProtocolTransport.get_write_buffer_limits`` in :mod:`!asyncio.sslproto` From 8226fb957c88b879f9dc688626f33e8fbae544ac Mon Sep 17 00:00:00 2001 From: Vincent Fazio Date: Thu, 10 Nov 2022 10:26:42 -0600 Subject: [PATCH 08/29] gh-99204: Calculate base_executable by alternate names in POSIX venvs (GH-99206) Check to see if `base_executable` exists. If it does not, attempt to use known alternative names of the python binary to find an executable in the path specified by `home`. If no alternative is found, previous behavior is preserved. Signed-off-by: Vincent Fazio Signed-off-by: Vincent Fazio --- Lib/test/test_getpath.py | 31 +++++++++++++++++++ ...2-11-07-08-17-12.gh-issue-99204.Mf4hMD.rst | 4 +++ Modules/getpath.py | 19 ++++++++++++ 3 files changed, 54 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-11-07-08-17-12.gh-issue-99204.Mf4hMD.rst diff --git a/Lib/test/test_getpath.py b/Lib/test/test_getpath.py index 12d52442c554a1..7a22a9a8fb6043 100644 --- a/Lib/test/test_getpath.py +++ b/Lib/test/test_getpath.py @@ -382,6 +382,37 @@ def test_venv_changed_name_posix(self): actual = getpath(ns, expected) self.assertEqual(expected, actual) + def test_venv_changed_name_copy_posix(self): + "Test a venv --copies layout on *nix that lacks a distributed 'python'" + ns = MockPosixNamespace( + argv0="python", + PREFIX="/usr", + ENV_PATH="/venv/bin:/usr/bin", + ) + ns.add_known_xfile("/usr/bin/python9") + ns.add_known_xfile("/venv/bin/python") + ns.add_known_file("/usr/lib/python9.8/os.py") + ns.add_known_dir("/usr/lib/python9.8/lib-dynload") + ns.add_known_file("/venv/pyvenv.cfg", [ + r"home = /usr/bin" + ]) + expected = dict( + executable="/venv/bin/python", + prefix="/usr", + exec_prefix="/usr", + base_executable="/usr/bin/python9", + base_prefix="/usr", + base_exec_prefix="/usr", + module_search_paths_set=1, + module_search_paths=[ + "/usr/lib/python98.zip", + "/usr/lib/python9.8", + "/usr/lib/python9.8/lib-dynload", + ], + ) + actual = getpath(ns, expected) + self.assertEqual(expected, actual) + def test_symlink_normal_posix(self): "Test a 'standard' install layout via symlink on *nix" ns = MockPosixNamespace( diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-11-07-08-17-12.gh-issue-99204.Mf4hMD.rst b/Misc/NEWS.d/next/Core and Builtins/2022-11-07-08-17-12.gh-issue-99204.Mf4hMD.rst new file mode 100644 index 00000000000000..571cdd02cd5588 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-11-07-08-17-12.gh-issue-99204.Mf4hMD.rst @@ -0,0 +1,4 @@ +Fix calculation of :data:`sys._base_executable` when inside a POSIX virtual +environment using copies of the python binary when the base installation does +not provide the executable name used by the venv. Calculation will fall back to +alternative names ("python", "python."). diff --git a/Modules/getpath.py b/Modules/getpath.py index 90a6473f1e6ce7..d24b15259a973e 100644 --- a/Modules/getpath.py +++ b/Modules/getpath.py @@ -375,6 +375,25 @@ def search_up(prefix, *landmarks, test=isfile): pass if not base_executable: base_executable = joinpath(executable_dir, basename(executable)) + # It's possible "python" is executed from within a posix venv but that + # "python" is not available in the "home" directory as the standard + # `make install` does not create it and distros often do not provide it. + # + # In this case, try to fall back to known alternatives + if os_name != 'nt' and not isfile(base_executable): + base_exe = basename(executable) + for candidate in (DEFAULT_PROGRAM_NAME, f'python{VERSION_MAJOR}.{VERSION_MINOR}'): + candidate += EXE_SUFFIX if EXE_SUFFIX else '' + if base_exe == candidate: + continue + candidate = joinpath(executable_dir, candidate) + # Only set base_executable if the candidate exists. + # If no candidate succeeds, subsequent errors related to + # base_executable (like FileNotFoundError) remain in the + # context of the original executable name + if isfile(candidate): + base_executable = candidate + break break else: venv_prefix = None From 1aa012429d46e5b575fb939e846903f7f0c00dcb Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 10 Nov 2022 08:46:56 -0800 Subject: [PATCH 09/29] GH-99298: Don't perform jumps before error handling (GH-99299) --- ...2-11-09-12-07-24.gh-issue-99298.NeArAJ.rst | 2 ++ Python/bytecodes.c | 34 +++++++++++-------- Python/generated_cases.c.h | 34 +++++++++++-------- 3 files changed, 40 insertions(+), 30 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-11-09-12-07-24.gh-issue-99298.NeArAJ.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-11-09-12-07-24.gh-issue-99298.NeArAJ.rst b/Misc/NEWS.d/next/Core and Builtins/2022-11-09-12-07-24.gh-issue-99298.NeArAJ.rst new file mode 100644 index 00000000000000..8908bfaa8e25f6 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-11-09-12-07-24.gh-issue-99298.NeArAJ.rst @@ -0,0 +1,2 @@ +Fix an issue that could potentially cause incorrect error handling for some +bytecode instructions. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 26962973099247..f5e87a06c69a54 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1137,6 +1137,8 @@ dummy_func( PyObject *name = GETITEM(names, oparg); next_instr--; if (_Py_Specialize_StoreAttr(owner, next_instr, name)) { + // "undo" the rewind so end up in the correct handler: + next_instr++; goto error; } DISPATCH_SAME_OPARG(); @@ -1716,6 +1718,8 @@ dummy_func( PyObject *name = GETITEM(names, oparg>>1); next_instr--; if (_Py_Specialize_LoadAttr(owner, next_instr, name)) { + // "undo" the rewind so end up in the correct handler: + next_instr++; goto error; } DISPATCH_SAME_OPARG(); @@ -3104,7 +3108,6 @@ dummy_func( PyObject *callable = PEEK(2); DEOPT_IF(callable != (PyObject *)&PyUnicode_Type, CALL); STAT_INC(CALL, hit); - JUMPBY(INLINE_CACHE_ENTRIES_CALL); PyObject *arg = TOP(); PyObject *res = PyObject_Str(arg); Py_DECREF(arg); @@ -3114,6 +3117,7 @@ dummy_func( if (res == NULL) { goto error; } + JUMPBY(INLINE_CACHE_ENTRIES_CALL); CHECK_EVAL_BREAKER(); } @@ -3125,7 +3129,6 @@ dummy_func( PyObject *callable = PEEK(2); DEOPT_IF(callable != (PyObject *)&PyTuple_Type, CALL); STAT_INC(CALL, hit); - JUMPBY(INLINE_CACHE_ENTRIES_CALL); PyObject *arg = TOP(); PyObject *res = PySequence_Tuple(arg); Py_DECREF(arg); @@ -3135,6 +3138,7 @@ dummy_func( if (res == NULL) { goto error; } + JUMPBY(INLINE_CACHE_ENTRIES_CALL); CHECK_EVAL_BREAKER(); } @@ -3148,7 +3152,6 @@ dummy_func( PyTypeObject *tp = (PyTypeObject *)callable; DEOPT_IF(tp->tp_vectorcall == NULL, CALL); STAT_INC(CALL, hit); - JUMPBY(INLINE_CACHE_ENTRIES_CALL); STACK_SHRINK(total_args); PyObject *res = tp->tp_vectorcall((PyObject *)tp, stack_pointer, total_args-kwnames_len, kwnames); @@ -3163,6 +3166,7 @@ dummy_func( if (res == NULL) { goto error; } + JUMPBY(INLINE_CACHE_ENTRIES_CALL); CHECK_EVAL_BREAKER(); } @@ -3178,7 +3182,6 @@ dummy_func( DEOPT_IF(!PyCFunction_CheckExact(callable), CALL); DEOPT_IF(PyCFunction_GET_FLAGS(callable) != METH_O, CALL); STAT_INC(CALL, hit); - JUMPBY(INLINE_CACHE_ENTRIES_CALL); PyCFunction cfunc = PyCFunction_GET_FUNCTION(callable); // This is slower but CPython promises to check all non-vectorcall // function calls. @@ -3197,6 +3200,7 @@ dummy_func( if (res == NULL) { goto error; } + JUMPBY(INLINE_CACHE_ENTRIES_CALL); CHECK_EVAL_BREAKER(); } @@ -3212,7 +3216,6 @@ dummy_func( DEOPT_IF(PyCFunction_GET_FLAGS(callable) != METH_FASTCALL, CALL); STAT_INC(CALL, hit); - JUMPBY(INLINE_CACHE_ENTRIES_CALL); PyCFunction cfunc = PyCFunction_GET_FUNCTION(callable); STACK_SHRINK(total_args); /* res = func(self, args, nargs) */ @@ -3237,6 +3240,7 @@ dummy_func( */ goto error; } + JUMPBY(INLINE_CACHE_ENTRIES_CALL); CHECK_EVAL_BREAKER(); } @@ -3251,7 +3255,6 @@ dummy_func( DEOPT_IF(PyCFunction_GET_FLAGS(callable) != (METH_FASTCALL | METH_KEYWORDS), CALL); STAT_INC(CALL, hit); - JUMPBY(INLINE_CACHE_ENTRIES_CALL); STACK_SHRINK(total_args); /* res = func(self, args, nargs, kwnames) */ _PyCFunctionFastWithKeywords cfunc = @@ -3276,6 +3279,7 @@ dummy_func( if (res == NULL) { goto error; } + JUMPBY(INLINE_CACHE_ENTRIES_CALL); CHECK_EVAL_BREAKER(); } @@ -3291,7 +3295,6 @@ dummy_func( PyInterpreterState *interp = _PyInterpreterState_GET(); DEOPT_IF(callable != interp->callable_cache.len, CALL); STAT_INC(CALL, hit); - JUMPBY(INLINE_CACHE_ENTRIES_CALL); PyObject *arg = TOP(); Py_ssize_t len_i = PyObject_Length(arg); if (len_i < 0) { @@ -3307,6 +3310,7 @@ dummy_func( if (res == NULL) { goto error; } + JUMPBY(INLINE_CACHE_ENTRIES_CALL); } // stack effect: (__0, __array[oparg] -- ) @@ -3321,7 +3325,6 @@ dummy_func( PyInterpreterState *interp = _PyInterpreterState_GET(); DEOPT_IF(callable != interp->callable_cache.isinstance, CALL); STAT_INC(CALL, hit); - JUMPBY(INLINE_CACHE_ENTRIES_CALL); PyObject *cls = POP(); PyObject *inst = TOP(); int retval = PyObject_IsInstance(inst, cls); @@ -3340,6 +3343,7 @@ dummy_func( if (res == NULL) { goto error; } + JUMPBY(INLINE_CACHE_ENTRIES_CALL); } // stack effect: (__0, __array[oparg] -- ) @@ -3353,9 +3357,6 @@ dummy_func( PyObject *list = SECOND(); DEOPT_IF(!PyList_Check(list), CALL); STAT_INC(CALL, hit); - // CALL + POP_TOP - JUMPBY(INLINE_CACHE_ENTRIES_CALL + 1); - assert(_Py_OPCODE(next_instr[-1]) == POP_TOP); PyObject *arg = POP(); if (_PyList_AppendTakeRef((PyListObject *)list, arg) < 0) { goto error; @@ -3363,6 +3364,9 @@ dummy_func( STACK_SHRINK(2); Py_DECREF(list); Py_DECREF(callable); + // CALL + POP_TOP + JUMPBY(INLINE_CACHE_ENTRIES_CALL + 1); + assert(_Py_OPCODE(next_instr[-1]) == POP_TOP); } // stack effect: (__0, __array[oparg] -- ) @@ -3380,7 +3384,6 @@ dummy_func( PyObject *self = SECOND(); DEOPT_IF(!Py_IS_TYPE(self, callable->d_common.d_type), CALL); STAT_INC(CALL, hit); - JUMPBY(INLINE_CACHE_ENTRIES_CALL); PyCFunction cfunc = meth->ml_meth; // This is slower but CPython promises to check all non-vectorcall // function calls. @@ -3398,6 +3401,7 @@ dummy_func( if (res == NULL) { goto error; } + JUMPBY(INLINE_CACHE_ENTRIES_CALL); CHECK_EVAL_BREAKER(); } @@ -3414,7 +3418,6 @@ dummy_func( PyObject *self = PEEK(total_args); DEOPT_IF(!Py_IS_TYPE(self, d_type), CALL); STAT_INC(CALL, hit); - JUMPBY(INLINE_CACHE_ENTRIES_CALL); int nargs = total_args-1; STACK_SHRINK(nargs); _PyCFunctionFastWithKeywords cfunc = @@ -3435,6 +3438,7 @@ dummy_func( if (res == NULL) { goto error; } + JUMPBY(INLINE_CACHE_ENTRIES_CALL); CHECK_EVAL_BREAKER(); } @@ -3452,7 +3456,6 @@ dummy_func( DEOPT_IF(!Py_IS_TYPE(self, callable->d_common.d_type), CALL); DEOPT_IF(meth->ml_flags != METH_NOARGS, CALL); STAT_INC(CALL, hit); - JUMPBY(INLINE_CACHE_ENTRIES_CALL); PyCFunction cfunc = meth->ml_meth; // This is slower but CPython promises to check all non-vectorcall // function calls. @@ -3469,6 +3472,7 @@ dummy_func( if (res == NULL) { goto error; } + JUMPBY(INLINE_CACHE_ENTRIES_CALL); CHECK_EVAL_BREAKER(); } @@ -3486,7 +3490,6 @@ dummy_func( PyObject *self = PEEK(total_args); DEOPT_IF(!Py_IS_TYPE(self, callable->d_common.d_type), CALL); STAT_INC(CALL, hit); - JUMPBY(INLINE_CACHE_ENTRIES_CALL); _PyCFunctionFast cfunc = (_PyCFunctionFast)(void(*)(void))meth->ml_meth; int nargs = total_args-1; @@ -3504,6 +3507,7 @@ dummy_func( if (res == NULL) { goto error; } + JUMPBY(INLINE_CACHE_ENTRIES_CALL); CHECK_EVAL_BREAKER(); } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 3424549b35346b..aa6d5bd66a9fbe 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -1137,6 +1137,8 @@ PyObject *name = GETITEM(names, oparg); next_instr--; if (_Py_Specialize_StoreAttr(owner, next_instr, name)) { + // "undo" the rewind so end up in the correct handler: + next_instr++; goto error; } DISPATCH_SAME_OPARG(); @@ -1716,6 +1718,8 @@ PyObject *name = GETITEM(names, oparg>>1); next_instr--; if (_Py_Specialize_LoadAttr(owner, next_instr, name)) { + // "undo" the rewind so end up in the correct handler: + next_instr++; goto error; } DISPATCH_SAME_OPARG(); @@ -3104,7 +3108,6 @@ PyObject *callable = PEEK(2); DEOPT_IF(callable != (PyObject *)&PyUnicode_Type, CALL); STAT_INC(CALL, hit); - JUMPBY(INLINE_CACHE_ENTRIES_CALL); PyObject *arg = TOP(); PyObject *res = PyObject_Str(arg); Py_DECREF(arg); @@ -3114,6 +3117,7 @@ if (res == NULL) { goto error; } + JUMPBY(INLINE_CACHE_ENTRIES_CALL); CHECK_EVAL_BREAKER(); DISPATCH(); } @@ -3125,7 +3129,6 @@ PyObject *callable = PEEK(2); DEOPT_IF(callable != (PyObject *)&PyTuple_Type, CALL); STAT_INC(CALL, hit); - JUMPBY(INLINE_CACHE_ENTRIES_CALL); PyObject *arg = TOP(); PyObject *res = PySequence_Tuple(arg); Py_DECREF(arg); @@ -3135,6 +3138,7 @@ if (res == NULL) { goto error; } + JUMPBY(INLINE_CACHE_ENTRIES_CALL); CHECK_EVAL_BREAKER(); DISPATCH(); } @@ -3148,7 +3152,6 @@ PyTypeObject *tp = (PyTypeObject *)callable; DEOPT_IF(tp->tp_vectorcall == NULL, CALL); STAT_INC(CALL, hit); - JUMPBY(INLINE_CACHE_ENTRIES_CALL); STACK_SHRINK(total_args); PyObject *res = tp->tp_vectorcall((PyObject *)tp, stack_pointer, total_args-kwnames_len, kwnames); @@ -3163,6 +3166,7 @@ if (res == NULL) { goto error; } + JUMPBY(INLINE_CACHE_ENTRIES_CALL); CHECK_EVAL_BREAKER(); DISPATCH(); } @@ -3178,7 +3182,6 @@ DEOPT_IF(!PyCFunction_CheckExact(callable), CALL); DEOPT_IF(PyCFunction_GET_FLAGS(callable) != METH_O, CALL); STAT_INC(CALL, hit); - JUMPBY(INLINE_CACHE_ENTRIES_CALL); PyCFunction cfunc = PyCFunction_GET_FUNCTION(callable); // This is slower but CPython promises to check all non-vectorcall // function calls. @@ -3197,6 +3200,7 @@ if (res == NULL) { goto error; } + JUMPBY(INLINE_CACHE_ENTRIES_CALL); CHECK_EVAL_BREAKER(); DISPATCH(); } @@ -3212,7 +3216,6 @@ DEOPT_IF(PyCFunction_GET_FLAGS(callable) != METH_FASTCALL, CALL); STAT_INC(CALL, hit); - JUMPBY(INLINE_CACHE_ENTRIES_CALL); PyCFunction cfunc = PyCFunction_GET_FUNCTION(callable); STACK_SHRINK(total_args); /* res = func(self, args, nargs) */ @@ -3237,6 +3240,7 @@ */ goto error; } + JUMPBY(INLINE_CACHE_ENTRIES_CALL); CHECK_EVAL_BREAKER(); DISPATCH(); } @@ -3251,7 +3255,6 @@ DEOPT_IF(PyCFunction_GET_FLAGS(callable) != (METH_FASTCALL | METH_KEYWORDS), CALL); STAT_INC(CALL, hit); - JUMPBY(INLINE_CACHE_ENTRIES_CALL); STACK_SHRINK(total_args); /* res = func(self, args, nargs, kwnames) */ _PyCFunctionFastWithKeywords cfunc = @@ -3276,6 +3279,7 @@ if (res == NULL) { goto error; } + JUMPBY(INLINE_CACHE_ENTRIES_CALL); CHECK_EVAL_BREAKER(); DISPATCH(); } @@ -3291,7 +3295,6 @@ PyInterpreterState *interp = _PyInterpreterState_GET(); DEOPT_IF(callable != interp->callable_cache.len, CALL); STAT_INC(CALL, hit); - JUMPBY(INLINE_CACHE_ENTRIES_CALL); PyObject *arg = TOP(); Py_ssize_t len_i = PyObject_Length(arg); if (len_i < 0) { @@ -3307,6 +3310,7 @@ if (res == NULL) { goto error; } + JUMPBY(INLINE_CACHE_ENTRIES_CALL); DISPATCH(); } @@ -3321,7 +3325,6 @@ PyInterpreterState *interp = _PyInterpreterState_GET(); DEOPT_IF(callable != interp->callable_cache.isinstance, CALL); STAT_INC(CALL, hit); - JUMPBY(INLINE_CACHE_ENTRIES_CALL); PyObject *cls = POP(); PyObject *inst = TOP(); int retval = PyObject_IsInstance(inst, cls); @@ -3340,6 +3343,7 @@ if (res == NULL) { goto error; } + JUMPBY(INLINE_CACHE_ENTRIES_CALL); DISPATCH(); } @@ -3353,9 +3357,6 @@ PyObject *list = SECOND(); DEOPT_IF(!PyList_Check(list), CALL); STAT_INC(CALL, hit); - // CALL + POP_TOP - JUMPBY(INLINE_CACHE_ENTRIES_CALL + 1); - assert(_Py_OPCODE(next_instr[-1]) == POP_TOP); PyObject *arg = POP(); if (_PyList_AppendTakeRef((PyListObject *)list, arg) < 0) { goto error; @@ -3363,6 +3364,9 @@ STACK_SHRINK(2); Py_DECREF(list); Py_DECREF(callable); + // CALL + POP_TOP + JUMPBY(INLINE_CACHE_ENTRIES_CALL + 1); + assert(_Py_OPCODE(next_instr[-1]) == POP_TOP); DISPATCH(); } @@ -3380,7 +3384,6 @@ PyObject *self = SECOND(); DEOPT_IF(!Py_IS_TYPE(self, callable->d_common.d_type), CALL); STAT_INC(CALL, hit); - JUMPBY(INLINE_CACHE_ENTRIES_CALL); PyCFunction cfunc = meth->ml_meth; // This is slower but CPython promises to check all non-vectorcall // function calls. @@ -3398,6 +3401,7 @@ if (res == NULL) { goto error; } + JUMPBY(INLINE_CACHE_ENTRIES_CALL); CHECK_EVAL_BREAKER(); DISPATCH(); } @@ -3414,7 +3418,6 @@ PyObject *self = PEEK(total_args); DEOPT_IF(!Py_IS_TYPE(self, d_type), CALL); STAT_INC(CALL, hit); - JUMPBY(INLINE_CACHE_ENTRIES_CALL); int nargs = total_args-1; STACK_SHRINK(nargs); _PyCFunctionFastWithKeywords cfunc = @@ -3435,6 +3438,7 @@ if (res == NULL) { goto error; } + JUMPBY(INLINE_CACHE_ENTRIES_CALL); CHECK_EVAL_BREAKER(); DISPATCH(); } @@ -3452,7 +3456,6 @@ DEOPT_IF(!Py_IS_TYPE(self, callable->d_common.d_type), CALL); DEOPT_IF(meth->ml_flags != METH_NOARGS, CALL); STAT_INC(CALL, hit); - JUMPBY(INLINE_CACHE_ENTRIES_CALL); PyCFunction cfunc = meth->ml_meth; // This is slower but CPython promises to check all non-vectorcall // function calls. @@ -3469,6 +3472,7 @@ if (res == NULL) { goto error; } + JUMPBY(INLINE_CACHE_ENTRIES_CALL); CHECK_EVAL_BREAKER(); DISPATCH(); } @@ -3486,7 +3490,6 @@ PyObject *self = PEEK(total_args); DEOPT_IF(!Py_IS_TYPE(self, callable->d_common.d_type), CALL); STAT_INC(CALL, hit); - JUMPBY(INLINE_CACHE_ENTRIES_CALL); _PyCFunctionFast cfunc = (_PyCFunctionFast)(void(*)(void))meth->ml_meth; int nargs = total_args-1; @@ -3504,6 +3507,7 @@ if (res == NULL) { goto error; } + JUMPBY(INLINE_CACHE_ENTRIES_CALL); CHECK_EVAL_BREAKER(); DISPATCH(); } From d094e422e99492f62d6b45057de00fb281480112 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 10 Nov 2022 10:50:57 -0800 Subject: [PATCH 10/29] GH-98831: Remove all remaining DISPATCH() calls from bytecodes.c (#99271) Also mark those opcodes that have no stack effect as such. Co-authored-by: Brandt Bucher --- Python/bytecodes.c | 182 ++++++++++++++++++------------------- Python/generated_cases.c.h | 162 ++++++++++++++++++--------------- 2 files changed, 175 insertions(+), 169 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index f5e87a06c69a54..4a90df6a23075a 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -68,7 +68,6 @@ do { \ #define JUMPBY(offset) ((void)0) #define GO_TO_INSTRUCTION(instname) ((void)0) #define DISPATCH_SAME_OPARG() ((void)0) -#define DISPATCH() ((void)0) #define inst(name, ...) case name: #define super(name) static int SUPER_##name @@ -102,10 +101,6 @@ dummy_func( switch (opcode) { - /* BEWARE! - It is essential that any operation that fails must goto error - and that all operation that succeed call DISPATCH() ! */ - // BEGIN BYTECODES // inst(NOP, (--)) { } @@ -150,16 +145,14 @@ dummy_func( super(LOAD_FAST__LOAD_CONST) = LOAD_FAST + LOAD_CONST; super(STORE_FAST__LOAD_FAST) = STORE_FAST + LOAD_FAST; super(STORE_FAST__STORE_FAST) = STORE_FAST + STORE_FAST; - super (LOAD_CONST__LOAD_FAST) = LOAD_CONST + LOAD_FAST; + super(LOAD_CONST__LOAD_FAST) = LOAD_CONST + LOAD_FAST; inst(POP_TOP, (value --)) { Py_DECREF(value); } - // stack effect: ( -- __0) - inst(PUSH_NULL) { - /* Use BASIC_PUSH as NULL is not a valid object pointer */ - BASIC_PUSH(NULL); + inst(PUSH_NULL, (-- res)) { + res = NULL; } inst(END_FOR, (value1, value2 --)) { @@ -807,11 +800,12 @@ dummy_func( Py_DECREF(receiver); SET_TOP(retval); JUMPBY(oparg); - DISPATCH(); } - assert(gen_status == PYGEN_NEXT); - assert(retval != NULL); - PUSH(retval); + else { + assert(gen_status == PYGEN_NEXT); + assert(retval != NULL); + PUSH(retval); + } } // stack effect: ( -- ) @@ -904,7 +898,6 @@ dummy_func( if (PyErr_GivenExceptionMatches(val, PyExc_StopAsyncIteration)) { Py_DECREF(val); Py_DECREF(POP()); - DISPATCH(); } else { PyObject *exc = Py_NewRef(PyExceptionInstance_Class(val)); @@ -926,12 +919,13 @@ dummy_func( Py_DECREF(POP()); // The last sent value. Py_DECREF(POP()); // The delegated sub-iterator. PUSH(value); - DISPATCH(); } - PyObject *exc_type = Py_NewRef(Py_TYPE(exc_value)); - PyObject *exc_traceback = PyException_GetTraceback(exc_value); - _PyErr_Restore(tstate, exc_type, Py_NewRef(exc_value), exc_traceback); - goto exception_unwind; + else { + PyObject *exc_type = Py_NewRef(Py_TYPE(exc_value)); + PyObject *exc_traceback = PyException_GetTraceback(exc_value); + _PyErr_Restore(tstate, exc_type, Py_NewRef(exc_value), exc_traceback); + goto exception_unwind; + } } inst(STOPITERATION_ERROR) { @@ -973,7 +967,6 @@ dummy_func( PyException_SetContext(error, exc); Py_DECREF(message); } - DISPATCH(); } @@ -1031,8 +1024,7 @@ dummy_func( goto error; } - // stack effect: ( -- ) - inst(DELETE_NAME) { + inst(DELETE_NAME, (--)) { PyObject *name = GETITEM(names, oparg); PyObject *ns = LOCALS(); int err; @@ -1042,6 +1034,7 @@ dummy_func( goto error; } err = PyObject_DelItem(ns, name); + // Can't use ERROR_IF here. if (err != 0) { format_exc_check_arg(tstate, PyExc_NameError, NAME_ERROR_MSG, @@ -1181,11 +1174,11 @@ dummy_func( goto error; } - // stack effect: ( -- ) - inst(DELETE_GLOBAL) { + inst(DELETE_GLOBAL, (--)) { PyObject *name = GETITEM(names, oparg); int err; err = PyDict_DelItem(GLOBALS(), name); + // Can't use ERROR_IF here. if (err != 0) { if (_PyErr_ExceptionMatches(tstate, PyExc_KeyError)) { format_exc_check_arg(tstate, PyExc_NameError, @@ -1365,18 +1358,13 @@ dummy_func( SET_TOP(Py_NewRef(res)); } - // stack effect: ( -- ) - inst(DELETE_FAST) { + inst(DELETE_FAST, (--)) { PyObject *v = GETLOCAL(oparg); - if (v != NULL) { - SETLOCAL(oparg, NULL); - DISPATCH(); - } - goto unbound_local_error; + ERROR_IF(v == NULL, unbound_local_error); + SETLOCAL(oparg, NULL); } - // stack effect: ( -- ) - inst(MAKE_CELL) { + inst(MAKE_CELL, (--)) { // "initial" is probably NULL but not if it's an arg (or set // via PyFrame_LocalsToFast() before MAKE_CELL has run). PyObject *initial = GETLOCAL(oparg); @@ -1387,17 +1375,17 @@ dummy_func( SETLOCAL(oparg, cell); } - // stack effect: ( -- ) - inst(DELETE_DEREF) { + inst(DELETE_DEREF, (--)) { PyObject *cell = GETLOCAL(oparg); PyObject *oldobj = PyCell_GET(cell); - if (oldobj != NULL) { - PyCell_SET(cell, NULL); - Py_DECREF(oldobj); - DISPATCH(); + // Can't use ERROR_IF here. + // Fortunately we don't need its superpower. + if (oldobj == NULL) { + format_exc_unbound(tstate, frame->f_code, oparg); + goto error; } - format_exc_unbound(tstate, frame->f_code, oparg); - goto error; + PyCell_SET(cell, NULL); + Py_DECREF(oldobj); } // stack effect: ( -- __0) @@ -1760,15 +1748,15 @@ dummy_func( Py_DECREF(owner); PUSH(meth); } - JUMPBY(INLINE_CACHE_ENTRIES_LOAD_ATTR); - DISPATCH(); } - PyObject *res = PyObject_GetAttr(owner, name); - if (res == NULL) { - goto error; + else { + PyObject *res = PyObject_GetAttr(owner, name); + if (res == NULL) { + goto error; + } + Py_DECREF(owner); + SET_TOP(res); } - Py_DECREF(owner); - SET_TOP(res); JUMPBY(INLINE_CACHE_ENTRIES_LOAD_ATTR); } @@ -2426,21 +2414,23 @@ dummy_func( if (Py_IsTrue(cond)) { STACK_SHRINK(1); _Py_DECREF_NO_DEALLOC(cond); - DISPATCH(); } - if (Py_IsFalse(cond)) { + else if (Py_IsFalse(cond)) { JUMPBY(oparg); - DISPATCH(); } - err = PyObject_IsTrue(cond); - if (err > 0) { - STACK_SHRINK(1); - Py_DECREF(cond); + else { + err = PyObject_IsTrue(cond); + if (err > 0) { + STACK_SHRINK(1); + Py_DECREF(cond); + } + else if (err == 0) { + JUMPBY(oparg); + } + else { + goto error; + } } - else if (err == 0) - JUMPBY(oparg); - else - goto error; } // error: JUMP_IF_TRUE_OR_POP stack effect depends on jump flag @@ -2450,22 +2440,23 @@ dummy_func( if (Py_IsFalse(cond)) { STACK_SHRINK(1); _Py_DECREF_NO_DEALLOC(cond); - DISPATCH(); - } - if (Py_IsTrue(cond)) { - JUMPBY(oparg); - DISPATCH(); } - err = PyObject_IsTrue(cond); - if (err > 0) { + else if (Py_IsTrue(cond)) { JUMPBY(oparg); } - else if (err == 0) { - STACK_SHRINK(1); - Py_DECREF(cond); + else { + err = PyObject_IsTrue(cond); + if (err > 0) { + JUMPBY(oparg); + } + else if (err == 0) { + STACK_SHRINK(1); + Py_DECREF(cond); + } + else { + goto error; + } } - else - goto error; } // stack effect: ( -- ) @@ -2606,23 +2597,24 @@ dummy_func( if (next != NULL) { PUSH(next); JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER); - DISPATCH(); } - if (_PyErr_Occurred(tstate)) { - if (!_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) { - goto error; - } - else if (tstate->c_tracefunc != NULL) { - call_exc_trace(tstate->c_tracefunc, tstate->c_traceobj, tstate, frame); + else { + if (_PyErr_Occurred(tstate)) { + if (!_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) { + goto error; + } + else if (tstate->c_tracefunc != NULL) { + call_exc_trace(tstate->c_tracefunc, tstate->c_traceobj, tstate, frame); + } + _PyErr_Clear(tstate); } - _PyErr_Clear(tstate); + /* iterator ended normally */ + assert(_Py_OPCODE(next_instr[INLINE_CACHE_ENTRIES_FOR_ITER + oparg]) == END_FOR); + STACK_SHRINK(1); + Py_DECREF(iter); + /* Skip END_FOR */ + JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1); } - /* iterator ended normally */ - assert(_Py_OPCODE(next_instr[INLINE_CACHE_ENTRIES_FOR_ITER + oparg]) == END_FOR); - STACK_SHRINK(1); - Py_DECREF(iter); - /* Skip END_FOR */ - JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1); } // stack effect: ( -- __0) @@ -2637,7 +2629,7 @@ dummy_func( PyObject *next = PyList_GET_ITEM(seq, it->it_index++); PUSH(Py_NewRef(next)); JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER); - DISPATCH(); + goto end_for_iter_list; // End of this instruction } it->it_seq = NULL; Py_DECREF(seq); @@ -2645,6 +2637,7 @@ dummy_func( STACK_SHRINK(1); Py_DECREF(it); JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1); + end_for_iter_list: } // stack effect: ( -- __0) @@ -2659,15 +2652,16 @@ dummy_func( STACK_SHRINK(1); Py_DECREF(r); JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1); - DISPATCH(); } - long value = (long)(r->start + - (unsigned long)(r->index++) * r->step); - if (_PyLong_AssignValue(&GETLOCAL(_Py_OPARG(next)), value) < 0) { - goto error; + else { + long value = (long)(r->start + + (unsigned long)(r->index++) * r->step); + if (_PyLong_AssignValue(&GETLOCAL(_Py_OPARG(next)), value) < 0) { + goto error; + } + // The STORE_FAST is already done. + JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER + 1); } - // The STORE_FAST is already done. - JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER + 1); } inst(FOR_ITER_GEN) { diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index aa6d5bd66a9fbe..a58a4e647af626 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -70,8 +70,10 @@ } TARGET(PUSH_NULL) { - /* Use BASIC_PUSH as NULL is not a valid object pointer */ - BASIC_PUSH(NULL); + PyObject *res; + res = NULL; + STACK_GROW(1); + POKE(1, res); DISPATCH(); } @@ -809,11 +811,12 @@ Py_DECREF(receiver); SET_TOP(retval); JUMPBY(oparg); - DISPATCH(); } - assert(gen_status == PYGEN_NEXT); - assert(retval != NULL); - PUSH(retval); + else { + assert(gen_status == PYGEN_NEXT); + assert(retval != NULL); + PUSH(retval); + } DISPATCH(); } @@ -904,7 +907,6 @@ if (PyErr_GivenExceptionMatches(val, PyExc_StopAsyncIteration)) { Py_DECREF(val); Py_DECREF(POP()); - DISPATCH(); } else { PyObject *exc = Py_NewRef(PyExceptionInstance_Class(val)); @@ -926,12 +928,14 @@ Py_DECREF(POP()); // The last sent value. Py_DECREF(POP()); // The delegated sub-iterator. PUSH(value); - DISPATCH(); } - PyObject *exc_type = Py_NewRef(Py_TYPE(exc_value)); - PyObject *exc_traceback = PyException_GetTraceback(exc_value); - _PyErr_Restore(tstate, exc_type, Py_NewRef(exc_value), exc_traceback); - goto exception_unwind; + else { + PyObject *exc_type = Py_NewRef(Py_TYPE(exc_value)); + PyObject *exc_traceback = PyException_GetTraceback(exc_value); + _PyErr_Restore(tstate, exc_type, Py_NewRef(exc_value), exc_traceback); + goto exception_unwind; + } + DISPATCH(); } TARGET(STOPITERATION_ERROR) { @@ -1040,6 +1044,7 @@ goto error; } err = PyObject_DelItem(ns, name); + // Can't use ERROR_IF here. if (err != 0) { format_exc_check_arg(tstate, PyExc_NameError, NAME_ERROR_MSG, @@ -1186,6 +1191,7 @@ PyObject *name = GETITEM(names, oparg); int err; err = PyDict_DelItem(GLOBALS(), name); + // Can't use ERROR_IF here. if (err != 0) { if (_PyErr_ExceptionMatches(tstate, PyExc_KeyError)) { format_exc_check_arg(tstate, PyExc_NameError, @@ -1369,11 +1375,9 @@ TARGET(DELETE_FAST) { PyObject *v = GETLOCAL(oparg); - if (v != NULL) { - SETLOCAL(oparg, NULL); - DISPATCH(); - } - goto unbound_local_error; + if (v == NULL) goto unbound_local_error; + SETLOCAL(oparg, NULL); + DISPATCH(); } TARGET(MAKE_CELL) { @@ -1391,13 +1395,15 @@ TARGET(DELETE_DEREF) { PyObject *cell = GETLOCAL(oparg); PyObject *oldobj = PyCell_GET(cell); - if (oldobj != NULL) { - PyCell_SET(cell, NULL); - Py_DECREF(oldobj); - DISPATCH(); + // Can't use ERROR_IF here. + // Fortunately we don't need its superpower. + if (oldobj == NULL) { + format_exc_unbound(tstate, frame->f_code, oparg); + goto error; } - format_exc_unbound(tstate, frame->f_code, oparg); - goto error; + PyCell_SET(cell, NULL); + Py_DECREF(oldobj); + DISPATCH(); } TARGET(LOAD_CLASSDEREF) { @@ -1760,15 +1766,15 @@ Py_DECREF(owner); PUSH(meth); } - JUMPBY(INLINE_CACHE_ENTRIES_LOAD_ATTR); - DISPATCH(); } - PyObject *res = PyObject_GetAttr(owner, name); - if (res == NULL) { - goto error; + else { + PyObject *res = PyObject_GetAttr(owner, name); + if (res == NULL) { + goto error; + } + Py_DECREF(owner); + SET_TOP(res); } - Py_DECREF(owner); - SET_TOP(res); JUMPBY(INLINE_CACHE_ENTRIES_LOAD_ATTR); DISPATCH(); } @@ -2427,21 +2433,23 @@ if (Py_IsTrue(cond)) { STACK_SHRINK(1); _Py_DECREF_NO_DEALLOC(cond); - DISPATCH(); } - if (Py_IsFalse(cond)) { + else if (Py_IsFalse(cond)) { JUMPBY(oparg); - DISPATCH(); } - err = PyObject_IsTrue(cond); - if (err > 0) { - STACK_SHRINK(1); - Py_DECREF(cond); + else { + err = PyObject_IsTrue(cond); + if (err > 0) { + STACK_SHRINK(1); + Py_DECREF(cond); + } + else if (err == 0) { + JUMPBY(oparg); + } + else { + goto error; + } } - else if (err == 0) - JUMPBY(oparg); - else - goto error; DISPATCH(); } @@ -2451,22 +2459,23 @@ if (Py_IsFalse(cond)) { STACK_SHRINK(1); _Py_DECREF_NO_DEALLOC(cond); - DISPATCH(); - } - if (Py_IsTrue(cond)) { - JUMPBY(oparg); - DISPATCH(); } - err = PyObject_IsTrue(cond); - if (err > 0) { + else if (Py_IsTrue(cond)) { JUMPBY(oparg); } - else if (err == 0) { - STACK_SHRINK(1); - Py_DECREF(cond); + else { + err = PyObject_IsTrue(cond); + if (err > 0) { + JUMPBY(oparg); + } + else if (err == 0) { + STACK_SHRINK(1); + Py_DECREF(cond); + } + else { + goto error; + } } - else - goto error; DISPATCH(); } @@ -2608,23 +2617,24 @@ if (next != NULL) { PUSH(next); JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER); - DISPATCH(); } - if (_PyErr_Occurred(tstate)) { - if (!_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) { - goto error; - } - else if (tstate->c_tracefunc != NULL) { - call_exc_trace(tstate->c_tracefunc, tstate->c_traceobj, tstate, frame); + else { + if (_PyErr_Occurred(tstate)) { + if (!_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) { + goto error; + } + else if (tstate->c_tracefunc != NULL) { + call_exc_trace(tstate->c_tracefunc, tstate->c_traceobj, tstate, frame); + } + _PyErr_Clear(tstate); } - _PyErr_Clear(tstate); + /* iterator ended normally */ + assert(_Py_OPCODE(next_instr[INLINE_CACHE_ENTRIES_FOR_ITER + oparg]) == END_FOR); + STACK_SHRINK(1); + Py_DECREF(iter); + /* Skip END_FOR */ + JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1); } - /* iterator ended normally */ - assert(_Py_OPCODE(next_instr[INLINE_CACHE_ENTRIES_FOR_ITER + oparg]) == END_FOR); - STACK_SHRINK(1); - Py_DECREF(iter); - /* Skip END_FOR */ - JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1); DISPATCH(); } @@ -2639,7 +2649,7 @@ PyObject *next = PyList_GET_ITEM(seq, it->it_index++); PUSH(Py_NewRef(next)); JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER); - DISPATCH(); + goto end_for_iter_list; // End of this instruction } it->it_seq = NULL; Py_DECREF(seq); @@ -2647,6 +2657,7 @@ STACK_SHRINK(1); Py_DECREF(it); JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1); + end_for_iter_list: DISPATCH(); } @@ -2661,15 +2672,16 @@ STACK_SHRINK(1); Py_DECREF(r); JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1); - DISPATCH(); } - long value = (long)(r->start + - (unsigned long)(r->index++) * r->step); - if (_PyLong_AssignValue(&GETLOCAL(_Py_OPARG(next)), value) < 0) { - goto error; + else { + long value = (long)(r->start + + (unsigned long)(r->index++) * r->step); + if (_PyLong_AssignValue(&GETLOCAL(_Py_OPARG(next)), value) < 0) { + goto error; + } + // The STORE_FAST is already done. + JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER + 1); } - // The STORE_FAST is already done. - JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER + 1); DISPATCH(); } From d3d907a455ab7059b66bcf85d150dbf818683328 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 10 Nov 2022 15:55:13 -0800 Subject: [PATCH 11/29] Remaining BINARY_OP family members --- Python/bytecodes.c | 21 ++++++--------------- Python/generated_cases.c.h | 21 +++++++++++---------- Tools/cases_generator/generate_cases.py | 4 ++++ 3 files changed, 21 insertions(+), 25 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 4a90df6a23075a..7c094bf2ab83b2 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -78,7 +78,7 @@ do { \ // Dummy variables for stack effects. static PyObject *value, *value1, *value2, *left, *right, *res, *sum, *prod, *sub; -static PyObject *container, *start, *stop, *v; +static PyObject *container, *start, *stop, *v, *lhs, *rhs; static PyObject * dummy_func( @@ -3682,30 +3682,21 @@ dummy_func( PUSH(Py_NewRef(peek)); } - // stack effect: (__0 -- ) - inst(BINARY_OP_GENERIC) { - PyObject *rhs = POP(); - PyObject *lhs = TOP(); + inst(BINARY_OP_GENERIC, (lhs, rhs, unused/1 -- res)) { assert(0 <= oparg); assert((unsigned)oparg < Py_ARRAY_LENGTH(binary_ops)); assert(binary_ops[oparg]); - PyObject *res = binary_ops[oparg](lhs, rhs); + res = binary_ops[oparg](lhs, rhs); Py_DECREF(lhs); Py_DECREF(rhs); - SET_TOP(res); - if (res == NULL) { - goto error; - } - JUMPBY(INLINE_CACHE_ENTRIES_BINARY_OP); + ERROR_IF(res == NULL, error); } - // stack effect: (__0 -- ) - inst(BINARY_OP) { + // This always dispatches, so 'res' is unused. + inst(BINARY_OP, (lhs, rhs, unused/1 -- res)) { _PyBinaryOpCache *cache = (_PyBinaryOpCache *)next_instr; if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { assert(cframe.use_tracing == 0); - PyObject *lhs = SECOND(); - PyObject *rhs = TOP(); next_instr--; _Py_Specialize_BinaryOp(lhs, rhs, next_instr, oparg, &GETLOCAL(0)); DISPATCH_SAME_OPARG(); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index a58a4e647af626..7b73a5e49dd82c 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3703,29 +3703,30 @@ TARGET(BINARY_OP_GENERIC) { PREDICTED(BINARY_OP_GENERIC); - PyObject *rhs = POP(); - PyObject *lhs = TOP(); + PyObject *rhs = PEEK(1); + PyObject *lhs = PEEK(2); + PyObject *res; assert(0 <= oparg); assert((unsigned)oparg < Py_ARRAY_LENGTH(binary_ops)); assert(binary_ops[oparg]); - PyObject *res = binary_ops[oparg](lhs, rhs); + res = binary_ops[oparg](lhs, rhs); Py_DECREF(lhs); Py_DECREF(rhs); - SET_TOP(res); - if (res == NULL) { - goto error; - } - JUMPBY(INLINE_CACHE_ENTRIES_BINARY_OP); + if (res == NULL) goto pop_2_error; + STACK_SHRINK(1); + POKE(1, res); + next_instr += 1; DISPATCH(); } TARGET(BINARY_OP) { PREDICTED(BINARY_OP); + PyObject *rhs = PEEK(1); + PyObject *lhs = PEEK(2); + PyObject *res; _PyBinaryOpCache *cache = (_PyBinaryOpCache *)next_instr; if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { assert(cframe.use_tracing == 0); - PyObject *lhs = SECOND(); - PyObject *rhs = TOP(); next_instr--; _Py_Specialize_BinaryOp(lhs, rhs, next_instr, oparg, &GETLOCAL(0)); DISPATCH_SAME_OPARG(); diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 11ad8efd2a391c..5a1ee7d2a86ae7 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -124,6 +124,10 @@ def write_instr(instr: InstDef, predictions: set[str], indent: str, f: TextIO, d f.write(f"{space}if ({cond}) goto {label};\n") else: f.write(line) + if always_exits(instr.block): + # None of the rest matters + return + # Stack effect noutputs = len(outputs) diff = noutputs - ninputs if diff > 0: From 0339a670f600b1913300c196829a1a6d94a59e72 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 10 Nov 2022 15:55:28 -0800 Subject: [PATCH 12/29] Uniformly skip 'unused' effects --- Tools/cases_generator/generate_cases.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 5a1ee7d2a86ae7..284f20a59229f6 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -86,15 +86,16 @@ def write_instr(instr: InstDef, predictions: set[str], indent: str, f: TextIO, d outputs = instr.outputs cache_offset = 0 for ceffect in cache: - if ceffect.name not in ("unused", "u", "_"): + if ceffect.name != "unused": bits = ceffect.size * 16 f.write(f"{indent} PyObject *{ceffect.name} = read{bits}(next_instr + {cache_offset});\n") cache_offset += ceffect.size # TODO: Is it better to count forward or backward? for i, effect in enumerate(reversed(stack), 1): - f.write(f"{indent} PyObject *{effect.name} = PEEK({i});\n") + if effect.name is not "unused": + f.write(f"{indent} PyObject *{effect.name} = PEEK({i});\n") for output in instr.outputs: - if output.name not in input_names: + if output.name not in input_names and output.name != "unused": f.write(f"{indent} PyObject *{output.name};\n") assert instr.block is not None blocklines = instr.block.to_text(dedent=dedent).splitlines(True) @@ -135,7 +136,7 @@ def write_instr(instr: InstDef, predictions: set[str], indent: str, f: TextIO, d elif diff < 0: f.write(f"{indent} STACK_SHRINK({-diff});\n") for i, output in enumerate(reversed(outputs), 1): - if output.name not in (input_names): + if output.name not in input_names and output.name != "unused": f.write(f"{indent} POKE({i}, {output.name});\n") # Cache effect if cache_offset: From f3e7dd69b3c943dd8ae7535a4daa3d534ae56cc3 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 10 Nov 2022 15:58:09 -0800 Subject: [PATCH 13/29] Remove superfluous asserts; fix one 'is not' --- Tools/cases_generator/generate_cases.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 284f20a59229f6..b686f776c90d99 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -39,7 +39,6 @@ def parse_cases( families: list[parser.Family] = [] while not psr.eof(): if inst := psr.inst_def(): - assert inst.block instrs.append(inst) elif sup := psr.super_def(): supers.append(sup) @@ -69,7 +68,6 @@ def always_exits(block: parser.Block) -> bool: def write_instr(instr: InstDef, predictions: set[str], indent: str, f: TextIO, dedent: int = 0): - assert instr.block if dedent < 0: indent += " " * -dedent # Separate stack inputs from cache inputs @@ -92,12 +90,11 @@ def write_instr(instr: InstDef, predictions: set[str], indent: str, f: TextIO, d cache_offset += ceffect.size # TODO: Is it better to count forward or backward? for i, effect in enumerate(reversed(stack), 1): - if effect.name is not "unused": + if effect.name != "unused": f.write(f"{indent} PyObject *{effect.name} = PEEK({i});\n") for output in instr.outputs: if output.name not in input_names and output.name != "unused": f.write(f"{indent} PyObject *{output.name};\n") - assert instr.block is not None blocklines = instr.block.to_text(dedent=dedent).splitlines(True) # Remove blank lines from ends while blocklines and not blocklines[0].strip(): @@ -146,8 +143,6 @@ def write_instr(instr: InstDef, predictions: set[str], indent: str, f: TextIO, d def write_cases(f: TextIO, instrs: list[InstDef], supers: list[parser.Super]): predictions: set[str] = set() for instr in instrs: - assert isinstance(instr, InstDef) - assert instr.block is not None for target in re.findall(RE_PREDICTED, instr.block.text): predictions.add(target) indent = " " @@ -160,18 +155,15 @@ def write_cases(f: TextIO, instrs: list[InstDef], supers: list[parser.Super]): if instr.name in predictions: f.write(f"{indent} PREDICTED({instr.name});\n") write_instr(instr, predictions, indent, f) - assert instr.block if not always_exits(instr.block): f.write(f"{indent} DISPATCH();\n") # Write trailing '}' f.write(f"{indent}}}\n") for sup in supers: - assert isinstance(sup, parser.Super) components = [instr_index[name] for name in sup.ops] f.write(f"\n{indent}TARGET({sup.name}) {{\n") for i, instr in enumerate(components): - assert instr.block if i > 0: f.write(f"{indent} NEXTOPARG();\n") f.write(f"{indent} next_instr++;\n") From e3ff6ac9cf4bcce245fc90260559bb694cea71f2 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 10 Nov 2022 16:04:44 -0800 Subject: [PATCH 14/29] Make BINARY_OP result unused --- Python/bytecodes.c | 4 ++-- Python/generated_cases.c.h | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 7c094bf2ab83b2..1e553dc98422bb 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3692,8 +3692,8 @@ dummy_func( ERROR_IF(res == NULL, error); } - // This always dispatches, so 'res' is unused. - inst(BINARY_OP, (lhs, rhs, unused/1 -- res)) { + // This always dispatches, so the result is unused. + inst(BINARY_OP, (lhs, rhs, unused/1 -- unused)) { _PyBinaryOpCache *cache = (_PyBinaryOpCache *)next_instr; if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { assert(cframe.use_tracing == 0); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 7b73a5e49dd82c..e4af5cc1600d56 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3723,7 +3723,6 @@ PREDICTED(BINARY_OP); PyObject *rhs = PEEK(1); PyObject *lhs = PEEK(2); - PyObject *res; _PyBinaryOpCache *cache = (_PyBinaryOpCache *)next_instr; if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { assert(cframe.use_tracing == 0); From 3db443a7eb70c140b2c0450721e86660f17e8e3f Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 10 Nov 2022 16:38:00 -0800 Subject: [PATCH 15/29] Fix parser for family() --- Tools/cases_generator/parser.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/Tools/cases_generator/parser.py b/Tools/cases_generator/parser.py index a358e84b44d387..68d2d812f21422 100644 --- a/Tools/cases_generator/parser.py +++ b/Tools/cases_generator/parser.py @@ -236,20 +236,26 @@ def family_def(self) -> Family | None: if (tkn := self.expect(lx.IDENTIFIER)): if self.expect(lx.RPAREN): if self.expect(lx.EQUALS): + if not self.expect(lx.LBRACE): + raise self.make_syntax_error("Expected {") if members := self.members(): - if self.expect(lx.SEMI): + if self.expect(lx.RBRACE) and self.expect(lx.SEMI): return Family(tkn.text, members) return None def members(self) -> list[str] | None: here = self.getpos() if tkn := self.expect(lx.IDENTIFIER): - near = self.getpos() - if self.expect(lx.COMMA): - if rest := self.members(): - return [tkn.text] + rest - self.setpos(near) - return [tkn.text] + members = [tkn.text] + while self.expect(lx.COMMA): + if tkn := self.expect(lx.IDENTIFIER): + members.append(tkn.text) + else: + break + peek = self.peek() + if not peek or peek.kind != lx.RBRACE: + raise self.make_syntax_error("Expected comma or right paren") + return members self.setpos(here) return None @@ -290,5 +296,5 @@ def c_blob(self) -> list[lx.Token]: filename = None src = "if (x) { x.foo; // comment\n}" parser = Parser(src, filename) - x = parser.inst_def() + x = parser.inst_def() or parser.super_def() or parser.family_def() print(x) From c58a85a3def2aa776d0e93df4c5fc0bdec14baf6 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 10 Nov 2022 16:38:20 -0800 Subject: [PATCH 16/29] Check family consistency --- Tools/cases_generator/generate_cases.py | 36 +++++++++++++++++++++---- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index b686f776c90d99..9e4f78f9a76c37 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -67,7 +67,10 @@ def always_exits(block: parser.Block) -> bool: return line.startswith(("goto ", "return ", "DISPATCH", "GO_TO_", "Py_UNREACHABLE()")) -def write_instr(instr: InstDef, predictions: set[str], indent: str, f: TextIO, dedent: int = 0): +def write_instr( + instr: InstDef, predictions: set[str], indent: str, f: TextIO, dedent: int = 0 +) -> int: + # Returns cache offset if dedent < 0: indent += " " * -dedent # Separate stack inputs from cache inputs @@ -124,7 +127,7 @@ def write_instr(instr: InstDef, predictions: set[str], indent: str, f: TextIO, d f.write(line) if always_exits(instr.block): # None of the rest matters - return + return cache_offset # Stack effect noutputs = len(outputs) diff = noutputs - ninputs @@ -138,9 +141,12 @@ def write_instr(instr: InstDef, predictions: set[str], indent: str, f: TextIO, d # Cache effect if cache_offset: f.write(f"{indent} next_instr += {cache_offset};\n") + return cache_offset -def write_cases(f: TextIO, instrs: list[InstDef], supers: list[parser.Super]): +def write_cases( + f: TextIO, instrs: list[InstDef], supers: list[parser.Super] +) -> dict[str, tuple[int, int, int]]: predictions: set[str] = set() for instr in instrs: for target in re.findall(RE_PREDICTED, instr.block.text): @@ -149,12 +155,14 @@ def write_cases(f: TextIO, instrs: list[InstDef], supers: list[parser.Super]): f.write(f"// This file is generated by {os.path.relpath(__file__)}\n") f.write(f"// Do not edit!\n") instr_index: dict[str, InstDef] = {} + effects_table: dict[str, tuple[int, int, int]] = {} # name -> (ninputs, noutputs, cache_offset) for instr in instrs: instr_index[instr.name] = instr f.write(f"\n{indent}TARGET({instr.name}) {{\n") if instr.name in predictions: f.write(f"{indent} PREDICTED({instr.name});\n") - write_instr(instr, predictions, indent, f) + cache_offset = write_instr(instr, predictions, indent, f) + effects_table[instr.name] = len(instr.inputs), len(instr.outputs), cache_offset if not always_exits(instr.block): f.write(f"{indent} DISPATCH();\n") # Write trailing '}' @@ -173,6 +181,8 @@ def write_cases(f: TextIO, instrs: list[InstDef], supers: list[parser.Super]): f.write(f"{indent} DISPATCH();\n") f.write(f"{indent}}}\n") + return effects_table + def main(): args = arg_parser.parse_args() @@ -193,12 +203,28 @@ def main(): file=sys.stderr, ) with eopen(args.output, "w") as f: - write_cases(f, instrs, supers) + effects_table = write_cases(f, instrs, supers) if not args.quiet: print( f"Wrote {ninstrs + nsupers} instructions to {args.output}", file=sys.stderr, ) + # Check that families have consistent effects + errors = 0 + for family in families: + head = effects_table[family.members[0]] + for member in family.members: + if effects_table[member] != head: + errors += 1 + print( + f"Family {family.name!r} has inconsistent effects (inputs, outputs, cache units):", + file=sys.stderr, + ) + print( + f" {family.members[0]} = {head}; {member} = {effects_table[member]}", + ) + if errors: + sys.exit(1) if __name__ == "__main__": From 756a41bfa5f9ea44c0bb5952e939941be933cdb4 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 10 Nov 2022 16:38:35 -0800 Subject: [PATCH 17/29] Add first family (binary_op) --- Python/bytecodes.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 1e553dc98422bb..e58361ab0e0b8e 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -191,6 +191,20 @@ dummy_func( ERROR_IF(res == NULL, error); } + family(binary_op) = { + BINARY_OP, + BINARY_OP_ADD_FLOAT, + BINARY_OP_ADD_INT, + BINARY_OP_ADD_UNICODE, + BINARY_OP_GENERIC, + // BINARY_OP_INPLACE_ADD_UNICODE, // This is an odd duck. + BINARY_OP_MULTIPLY_FLOAT, + BINARY_OP_MULTIPLY_INT, + BINARY_OP_SUBTRACT_FLOAT, + BINARY_OP_SUBTRACT_INT, + }; + + inst(BINARY_OP_MULTIPLY_INT, (left, right, unused/1 -- prod)) { assert(cframe.use_tracing == 0); DEOPT_IF(!PyLong_CheckExact(left), BINARY_OP); @@ -3743,13 +3757,8 @@ dummy_func( ; } -// Families go below this point // +// Future families go below this point // -family(binary_op) = { - BINARY_OP, BINARY_OP_ADD_FLOAT, - BINARY_OP_ADD_INT, BINARY_OP_ADD_UNICODE, BINARY_OP_GENERIC, BINARY_OP_INPLACE_ADD_UNICODE, - BINARY_OP_MULTIPLY_FLOAT, BINARY_OP_MULTIPLY_INT, BINARY_OP_SUBTRACT_FLOAT, - BINARY_OP_SUBTRACT_INT }; family(binary_subscr) = { BINARY_SUBSCR, BINARY_SUBSCR_DICT, BINARY_SUBSCR_GETITEM, BINARY_SUBSCR_LIST_INT, BINARY_SUBSCR_TUPLE_INT }; From 48400aceabe0891e6b1bf198b98436bbd9573734 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 10 Nov 2022 17:00:38 -0800 Subject: [PATCH 18/29] Add assert() to double-check cache struct size --- Python/bytecodes.c | 2 +- Python/generated_cases.c.h | 1 + Tools/cases_generator/generate_cases.py | 19 +++++++++++++++---- Tools/cases_generator/parser.py | 8 +++++++- 4 files changed, 24 insertions(+), 6 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index e58361ab0e0b8e..3745640c0ef255 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -191,7 +191,7 @@ dummy_func( ERROR_IF(res == NULL, error); } - family(binary_op) = { + family(binary_op, INLINE_CACHE_ENTRIES_BINARY_OP) = { BINARY_OP, BINARY_OP_ADD_FLOAT, BINARY_OP_ADD_INT, diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index e4af5cc1600d56..0236e3c77170c8 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3721,6 +3721,7 @@ TARGET(BINARY_OP) { PREDICTED(BINARY_OP); + assert(INLINE_CACHE_ENTRIES_BINARY_OP == 1); PyObject *rhs = PEEK(1); PyObject *lhs = PEEK(2); _PyBinaryOpCache *cache = (_PyBinaryOpCache *)next_instr; diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 9e4f78f9a76c37..d0165317509126 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -67,8 +67,14 @@ def always_exits(block: parser.Block) -> bool: return line.startswith(("goto ", "return ", "DISPATCH", "GO_TO_", "Py_UNREACHABLE()")) +def find_cache_size(instr: InstDef, families: list[parser.Family]) -> str | None: + for family in families: + if instr.name == family.members[0]: + return family.size + + def write_instr( - instr: InstDef, predictions: set[str], indent: str, f: TextIO, dedent: int = 0 + instr: InstDef, predictions: set[str], indent: str, f: TextIO, dedent: int = 0, cache_size: str | None = None ) -> int: # Returns cache offset if dedent < 0: @@ -91,6 +97,8 @@ def write_instr( bits = ceffect.size * 16 f.write(f"{indent} PyObject *{ceffect.name} = read{bits}(next_instr + {cache_offset});\n") cache_offset += ceffect.size + if cache_size: + f.write(f"{indent} assert({cache_size} == {cache_offset});\n") # TODO: Is it better to count forward or backward? for i, effect in enumerate(reversed(stack), 1): if effect.name != "unused": @@ -145,7 +153,7 @@ def write_instr( def write_cases( - f: TextIO, instrs: list[InstDef], supers: list[parser.Super] + f: TextIO, instrs: list[InstDef], supers: list[parser.Super], families: list[parser.Family] ) -> dict[str, tuple[int, int, int]]: predictions: set[str] = set() for instr in instrs: @@ -161,7 +169,10 @@ def write_cases( f.write(f"\n{indent}TARGET({instr.name}) {{\n") if instr.name in predictions: f.write(f"{indent} PREDICTED({instr.name});\n") - cache_offset = write_instr(instr, predictions, indent, f) + cache_offset = write_instr( + instr, predictions, indent, f, + cache_size=find_cache_size(instr, families) + ) effects_table[instr.name] = len(instr.inputs), len(instr.outputs), cache_offset if not always_exits(instr.block): f.write(f"{indent} DISPATCH();\n") @@ -203,7 +214,7 @@ def main(): file=sys.stderr, ) with eopen(args.output, "w") as f: - effects_table = write_cases(f, instrs, supers) + effects_table = write_cases(f, instrs, supers, families) if not args.quiet: print( f"Wrote {ninstrs + nsupers} instructions to {args.output}", diff --git a/Tools/cases_generator/parser.py b/Tools/cases_generator/parser.py index 68d2d812f21422..1f855312aeba9f 100644 --- a/Tools/cases_generator/parser.py +++ b/Tools/cases_generator/parser.py @@ -108,6 +108,7 @@ class Super(Node): @dataclass class Family(Node): name: str + size: str # Variable giving the cache size in code units members: list[str] @@ -232,15 +233,20 @@ def ops(self) -> list[str] | None: @contextual def family_def(self) -> Family | None: if (tkn := self.expect(lx.IDENTIFIER)) and tkn.text == "family": + size = None if self.expect(lx.LPAREN): if (tkn := self.expect(lx.IDENTIFIER)): + if self.expect(lx.COMMA): + if not (size := self.expect(lx.IDENTIFIER)): + raise self.make_syntax_error( + "Expected identifier") if self.expect(lx.RPAREN): if self.expect(lx.EQUALS): if not self.expect(lx.LBRACE): raise self.make_syntax_error("Expected {") if members := self.members(): if self.expect(lx.RBRACE) and self.expect(lx.SEMI): - return Family(tkn.text, members) + return Family(tkn.text, size.text if size else "", members) return None def members(self) -> list[str] | None: From ea163821e8a915c097416b85dac6bd6b0ffa5f87 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 11 Nov 2022 15:12:50 -0800 Subject: [PATCH 19/29] Make family() macro variadic --- Python/bytecodes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 3745640c0ef255..f11b7163935e81 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -71,7 +71,7 @@ do { \ #define inst(name, ...) case name: #define super(name) static int SUPER_##name -#define family(name) static int family_##name +#define family(name, ...) static int family_##name #define NAME_ERROR_MSG \ "name '%.200s' is not defined" From 205f12ecaa288edac54c3fb6955bf64184ff67e7 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 11 Nov 2022 15:39:32 -0800 Subject: [PATCH 20/29] Show correct lineno on error; get rid of eopen() --- Tools/cases_generator/generate_cases.py | 27 ++++++++++--------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index d0165317509126..2efaea60ad388a 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -21,23 +21,21 @@ arg_parser.add_argument("-q", "--quiet", action="store_true") -def eopen(filename: str, mode: str = "r") -> TextIO: - if filename == "-": - if "r" in mode: - return sys.stdin - else: - return sys.stdout - return cast(TextIO, open(filename, mode)) - - def parse_cases( src: str, filename: str|None = None ) -> tuple[list[InstDef], list[parser.Super], list[parser.Family]]: psr = parser.Parser(src, filename=filename) + # Skip until BEGIN marker + while tkn := psr.next(raw=True): + if tkn.text == "// BEGIN BYTECODES //": + break + else: + raise psr.make_syntax_error(f"Couldn't find {text!r} in {psr.filename}") instrs: list[InstDef] = [] supers: list[parser.Super] = [] families: list[parser.Family] = [] - while not psr.eof(): + # Parse until END marker + while not psr.eof() and psr.peek(raw=True).text != "// END BYTECODES //": if inst := psr.inst_def(): instrs.append(inst) elif sup := psr.super_def(): @@ -197,11 +195,8 @@ def write_cases( def main(): args = arg_parser.parse_args() - with eopen(args.input) as f: - srclines = f.read().splitlines() - begin = srclines.index("// BEGIN BYTECODES //") - end = srclines.index("// END BYTECODES //") - src = "\n".join(srclines[begin+1 : end]) + with open(args.input) as f: + src = f.read() instrs, supers, families = parse_cases(src, filename=args.input) ninstrs = nsupers = nfamilies = 0 if not args.quiet: @@ -213,7 +208,7 @@ def main(): f"and {nfamilies} families from {args.input}", file=sys.stderr, ) - with eopen(args.output, "w") as f: + with open(args.output, "w") as f: effects_table = write_cases(f, instrs, supers, families) if not args.quiet: print( From bdba4d2853dcd4d2014b89435a47f56dc24749e0 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 11 Nov 2022 15:46:07 -0800 Subject: [PATCH 21/29] Kill -q flag --- Tools/cases_generator/generate_cases.py | 28 +++++++++++-------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 2efaea60ad388a..287b9de1d7da0f 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -18,7 +18,6 @@ arg_parser = argparse.ArgumentParser() arg_parser.add_argument("-i", "--input", type=str, default="Python/bytecodes.c") arg_parser.add_argument("-o", "--output", type=str, default="Python/generated_cases.c.h") -arg_parser.add_argument("-q", "--quiet", action="store_true") def parse_cases( @@ -198,23 +197,20 @@ def main(): with open(args.input) as f: src = f.read() instrs, supers, families = parse_cases(src, filename=args.input) - ninstrs = nsupers = nfamilies = 0 - if not args.quiet: - ninstrs = len(instrs) - nsupers = len(supers) - nfamilies = len(families) - print( - f"Read {ninstrs} instructions, {nsupers} supers, " - f"and {nfamilies} families from {args.input}", - file=sys.stderr, - ) + ninstrs = len(instrs) + nsupers = len(supers) + nfamilies = len(families) + print( + f"Read {ninstrs} instructions, {nsupers} supers, " + f"and {nfamilies} families from {args.input}", + file=sys.stderr, + ) with open(args.output, "w") as f: effects_table = write_cases(f, instrs, supers, families) - if not args.quiet: - print( - f"Wrote {ninstrs + nsupers} instructions to {args.output}", - file=sys.stderr, - ) + print( + f"Wrote {ninstrs + nsupers} instructions to {args.output}", + file=sys.stderr, + ) # Check that families have consistent effects errors = 0 for family in families: From bf1043179d6c7b63fa0b25cd4a5c960b2674ef56 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 11 Nov 2022 21:47:37 -0800 Subject: [PATCH 22/29] Refactor generate_cases.py. Tweak output a teensy bit. --- Python/generated_cases.c.h | 3 +- Tools/cases_generator/generate_cases.py | 519 +++++++++++++++--------- Tools/cases_generator/parser.py | 5 +- 3 files changed, 326 insertions(+), 201 deletions(-) diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 0236e3c77170c8..b68b93cf87ea47 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -1,4 +1,5 @@ // This file is generated by Tools/cases_generator/generate_cases.py +// from Python/bytecodes.c // Do not edit! TARGET(NOP) { @@ -3721,7 +3722,7 @@ TARGET(BINARY_OP) { PREDICTED(BINARY_OP); - assert(INLINE_CACHE_ENTRIES_BINARY_OP == 1); + static_assert(INLINE_CACHE_ENTRIES_BINARY_OP == 1, "incorrect cache size"); PyObject *rhs = PEEK(1); PyObject *lhs = PEEK(2); _PyBinaryOpCache *cache = (_PyBinaryOpCache *)next_instr; diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 287b9de1d7da0f..2fbc8577b2a00f 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -1,52 +1,331 @@ -"""Generate the main interpreter switch.""" +"""Generate the main interpreter switch. -# Write the cases to generated_cases.c.h, which is #included in ceval.c. - -# TODO: Reuse C generation framework from deepfreeze.py? +Reads the instruction definitions from bytecodes.c. +Writes the cases to generated_cases.c.h, which is #included in ceval.c. +""" import argparse import os import re import sys -from typing import TextIO, cast +import typing import parser -from parser import InstDef # TODO: Use parser.InstDef +DEFAULT_INPUT = "Python/bytecodes.c" +DEFAULT_OUTPUT = "Python/generated_cases.c.h" +BEGIN_MARKER = "// BEGIN BYTECODES //" +END_MARKER = "// END BYTECODES //" RE_PREDICTED = r"(?s)(?:PREDICT\(|GO_TO_INSTRUCTION\(|DEOPT_IF\(.*?,\s*)(\w+)\);" arg_parser = argparse.ArgumentParser() -arg_parser.add_argument("-i", "--input", type=str, default="Python/bytecodes.c") -arg_parser.add_argument("-o", "--output", type=str, default="Python/generated_cases.c.h") - - -def parse_cases( - src: str, filename: str|None = None -) -> tuple[list[InstDef], list[parser.Super], list[parser.Family]]: - psr = parser.Parser(src, filename=filename) - # Skip until BEGIN marker - while tkn := psr.next(raw=True): - if tkn.text == "// BEGIN BYTECODES //": - break - else: - raise psr.make_syntax_error(f"Couldn't find {text!r} in {psr.filename}") - instrs: list[InstDef] = [] - supers: list[parser.Super] = [] - families: list[parser.Family] = [] - # Parse until END marker - while not psr.eof() and psr.peek(raw=True).text != "// END BYTECODES //": - if inst := psr.inst_def(): - instrs.append(inst) - elif sup := psr.super_def(): - supers.append(sup) - elif fam := psr.family_def(): - families.append(fam) +arg_parser.add_argument("-i", "--input", type=str, default=DEFAULT_INPUT) +arg_parser.add_argument("-o", "--output", type=str, default=DEFAULT_OUTPUT) + + +class Analyzer: + """Parse input, analyze it, and write to output.""" + + filename: str + src: str + + def __init__(self, filename: str): + """Read the input file.""" + self.filename = filename + with open(filename) as f: + self.src = f.read() + + instrs: dict[str, parser.InstDef] + supers: dict[str, parser.Super] + families: dict[str, parser.Family] + + def parse(self) -> None: + """Parse the source text.""" + psr = parser.Parser(self.src, filename=self.filename) + + # Skip until begin marker + while tkn := psr.next(raw=True): + if tkn.text == BEGIN_MARKER: + break else: - raise psr.make_syntax_error(f"Unexpected token") - return instrs, supers, families + raise psr.make_syntax_error(f"Couldn't find {marker!r} in {psr.filename}") + + # Parse until end marker + self.instrs = {} + self.supers = {} + self.families = {} + while (tkn := psr.peek(raw=True)) and tkn.text != END_MARKER: + if instr := psr.inst_def(): + self.instrs[instr.name] = instr + elif super := psr.super_def(): + self.supers[super.name] = super + elif family := psr.family_def(): + self.families[family.name] = family + else: + raise psr.make_syntax_error(f"Unexpected token") + + print( + f"Read {len(self.instrs)} instructions, " + f"{len(self.supers)} supers, " + f"and {len(self.families)} families from {self.filename}", + file=sys.stderr, + ) + + def analyze(self) -> None: + """Analyze the inputs. + + Raises SystemExit if there is an error. + """ + self.find_predictions() + self.compute_cache_offsets() + self.compute_stack_inputs() + self.compute_stack_outputs() + self.map_families() + errors = self.check_families() + if errors: + sys.exit(f"Found {errors} errors") + + predictions: set[str] = set() + + def find_predictions(self) -> None: + """Find the instructions that need PREDICTED() labels.""" + self.predictions = set() + for instr in self.instrs.values(): + for target in re.findall(RE_PREDICTED, instr.block.text): + self.predictions.add(target) + + cache_offsets: dict[str, int] + + def compute_cache_offsets(self) -> None: + """Compute the amount of cache space used per instruction.""" + self.cache_offsets = {} + for instr in self.instrs.values(): + cache_offset = 0 + for effect in instr.inputs: + if isinstance(effect, parser.CacheEffect): + cache_offset += effect.size + self.cache_offsets[instr.name] = cache_offset + + stack_inputs: dict[str, int] + + def compute_stack_inputs(self) -> None: + """Compute the number of stack items consumed per instruction.""" + self.stack_inputs = {} + for instr in self.instrs.values(): + stack_input = 0 + for effect in instr.inputs: + if isinstance(effect, parser.StackEffect): + stack_input += 1 + self.stack_inputs[instr.name] = stack_input + + stack_outputs: dict[str, int] + + def compute_stack_outputs(self) -> None: + """Compute the number of stack items produced per instruction.""" + self.stack_outputs = {} + for instr in self.instrs.values(): + stack_output = len(instr.outputs) + self.stack_outputs[instr.name] = stack_output + + family: dict[str, parser.Family] # instruction name -> family + + def map_families(self) -> None: + """Make instruction names back to their family, if they have one.""" + self.family = {} + for family in self.families.values(): + for member in family.members: + self.family[member] = family + + def check_families(self) -> int: + """Check each family: + + - Must have at least 2 members + - All members must be known instructions + - All members must have the same cache, input and output effects + """ + errors = 0 + for family in self.families.values(): + if len(family.members) < 2: + print(f"Family {family.name!r} has insufficient members") + errors += 1 + members = [member for member in family.members if member in self.instrs] + if members != family.members: + unknown = set(family.members) - set(members) + print(f"Family {family.name!r} has unknown members: {unknown}") + errors += 1 + if len(members) < 2: + continue + head = members[0] + cache = self.cache_offsets[head] + input = self.stack_inputs[head] + output = self.stack_outputs[head] + for member in members[1:]: + c = self.cache_offsets[member] + i = self.stack_inputs[member] + o = self.stack_outputs[member] + if (c, i, o) != (cache, input, output): + errors += 1 + print( + f"Family {family.name!r} has inconsistent " + f"(cache, inputs, outputs) effects:", + file=sys.stderr, + ) + print( + f" {family.members[0]} = {(cache, input, output)}; " + f"{member} = {(c, i, o)}", + file=sys.stderr, + ) + return errors + + indent: str = " " * 8 + + def write_instructions(self, filename: str) -> None: + """Write instructions to output file.""" + indent = self.indent + with open(filename, "w") as f: + # Write provenance header + f.write(f"// This file is generated by {os.path.relpath(__file__)}\n") + f.write(f"// from {os.path.relpath(self.filename)}\n") + f.write(f"// Do not edit!\n") + + # Write regular instructions + for name, instr in self.instrs.items(): + f.write(f"\n{indent}TARGET({name}) {{\n") + if name in self.predictions: + f.write(f"{indent} PREDICTED({name});\n") + self.write_instr(f, instr) + if not always_exits(instr.block): + f.write(f"{indent} DISPATCH();\n") + f.write(f"{indent}}}\n") + + # Write super-instructions + for name, sup in self.supers.items(): + components = [self.instrs[name] for name in sup.ops] + f.write(f"\n{indent}TARGET({sup.name}) {{\n") + for i, instr in enumerate(components): + if i > 0: + f.write(f"{indent} NEXTOPARG();\n") + f.write(f"{indent} next_instr++;\n") + f.write(f"{indent} {{\n") + self.write_instr(f, instr, dedent=-4) + f.write(f" {indent}}}\n") + f.write(f"{indent} DISPATCH();\n") + f.write(f"{indent}}}\n") + + print( + f"Wrote {len(self.instrs)} instructions and " + f"{len(self.supers)} super-instructions to {filename}", + file=sys.stderr, + ) + + def write_instr( + self, f: typing.TextIO, instr: parser.InstDef, dedent: int = 0 + ) -> None: + """Write one instruction, sans prologue and epilogue.""" + indent = self.indent + if dedent < 0: + indent += " " * -dedent # DO WE NEED THIS? + + # Get cache offset and maybe assert that it is correct + cache_offset = self.cache_offsets.get(instr.name, 0) + if family := self.family.get(instr.name): + if instr.name == family.members[0]: + if cache_size := family.size: + f.write( + f"{indent} static_assert({cache_size} == " + f'{cache_offset}, "incorrect cache size");\n' + ) + + # Separate cache effects from input stack effects + cache = [ + effect for effect in instr.inputs if isinstance(effect, parser.CacheEffect) + ] + stack = [ + effect for effect in instr.inputs if isinstance(effect, parser.StackEffect) + ] + + # Write cache effect variable declarations + for ceffect in cache: + if ceffect.name != "unused": + bits = ceffect.size * 16 + f.write( + f"{indent} PyObject *{ceffect.name} = " + f"read{bits}(next_instr + {cache_offset});\n" + ) + + # Write input stack effect variable declarations and initializations + for i, seffect in enumerate(reversed(stack), 1): + if seffect.name != "unused": + f.write(f"{indent} PyObject *{seffect.name} = PEEK({i});\n") + + # Write output stack effect variable declarations + for seffect in instr.outputs: + if seffect.name != "unused": + f.write(f"{indent} PyObject *{seffect.name};\n") + + self.write_instr_body(f, instr, dedent, len(stack)) + + # Skip the rest if the block always exits + if always_exits(instr.block): + return + + # Write net stack growth/shrinkage + diff = len(instr.outputs) - len(stack) + if diff > 0: + f.write(f"{indent} STACK_GROW({diff});\n") + elif diff < 0: + f.write(f"{indent} STACK_SHRINK({-diff});\n") + + # Write output stack effect assignments + input_names = [seffect.name for seffect in stack] + for i, output in enumerate(reversed(instr.outputs), 1): + if output.name not in input_names and output.name != "unused": + f.write(f"{indent} POKE({i}, {output.name});\n") + + # Write cache effect + if cache_offset: + f.write(f"{indent} next_instr += {cache_offset};\n") + + def write_instr_body( + self, f: typing.TextIO, instr: parser.InstDef, dedent: int, ninputs: int + ) -> None: + """Write the instruction body.""" + + # Get lines of text with proper dedelt + blocklines = instr.block.to_text(dedent=dedent).splitlines(True) + + # Remove blank lines from both ends + while blocklines and not blocklines[0].strip(): + blocklines.pop(0) + while blocklines and not blocklines[-1].strip(): + blocklines.pop() + + # Remove leading '{' and trailing '}' + assert blocklines and blocklines[0].strip() == "{" + assert blocklines and blocklines[-1].strip() == "}" + blocklines.pop() + blocklines.pop(0) + + # Remove trailing blank lines + while blocklines and not blocklines[-1].strip(): + blocklines.pop() + + # Write the body, substituting a goto for ERROR_IF() + for line in blocklines: + if m := re.match(r"(\s*)ERROR_IF\((.+), (\w+)\);\s*$", line): + space, cond, label = m.groups() + # ERROR_IF() must remove the inputs from the stack. + # The code block is responsible for DECREF()ing them. + if ninputs: + f.write(f"{space}if ({cond}) goto pop_{ninputs}_{label};\n") + else: + f.write(f"{space}if ({cond}) goto {label};\n") + else: + f.write(line) def always_exits(block: parser.Block) -> bool: + """Determine whether a block always ends in a return/goto/etc.""" text = block.text lines = text.splitlines() while lines and not lines[-1].strip(): @@ -58,175 +337,21 @@ def always_exits(block: parser.Block) -> bool: return False line = lines.pop().rstrip() # Indent must match exactly (TODO: Do something better) - if line[:12] != " "*12: + if line[:12] != " " * 12: return False line = line[12:] - return line.startswith(("goto ", "return ", "DISPATCH", "GO_TO_", "Py_UNREACHABLE()")) - - -def find_cache_size(instr: InstDef, families: list[parser.Family]) -> str | None: - for family in families: - if instr.name == family.members[0]: - return family.size - - -def write_instr( - instr: InstDef, predictions: set[str], indent: str, f: TextIO, dedent: int = 0, cache_size: str | None = None -) -> int: - # Returns cache offset - if dedent < 0: - indent += " " * -dedent - # Separate stack inputs from cache inputs - input_names: set[str] = set() - stack: list[parser.StackEffect] = [] - cache: list[parser.CacheEffect] = [] - for input in instr.inputs: - if isinstance(input, parser.StackEffect): - stack.append(input) - input_names.add(input.name) - else: - assert isinstance(input, parser.CacheEffect), input - cache.append(input) - outputs = instr.outputs - cache_offset = 0 - for ceffect in cache: - if ceffect.name != "unused": - bits = ceffect.size * 16 - f.write(f"{indent} PyObject *{ceffect.name} = read{bits}(next_instr + {cache_offset});\n") - cache_offset += ceffect.size - if cache_size: - f.write(f"{indent} assert({cache_size} == {cache_offset});\n") - # TODO: Is it better to count forward or backward? - for i, effect in enumerate(reversed(stack), 1): - if effect.name != "unused": - f.write(f"{indent} PyObject *{effect.name} = PEEK({i});\n") - for output in instr.outputs: - if output.name not in input_names and output.name != "unused": - f.write(f"{indent} PyObject *{output.name};\n") - blocklines = instr.block.to_text(dedent=dedent).splitlines(True) - # Remove blank lines from ends - while blocklines and not blocklines[0].strip(): - blocklines.pop(0) - while blocklines and not blocklines[-1].strip(): - blocklines.pop() - # Remove leading '{' and trailing '}' - assert blocklines and blocklines[0].strip() == "{" - assert blocklines and blocklines[-1].strip() == "}" - blocklines.pop() - blocklines.pop(0) - # Remove trailing blank lines - while blocklines and not blocklines[-1].strip(): - blocklines.pop() - # Write the body - ninputs = len(stack) - for line in blocklines: - if m := re.match(r"(\s*)ERROR_IF\((.+), (\w+)\);\s*$", line): - space, cond, label = m.groups() - # ERROR_IF() must remove the inputs from the stack. - # The code block is responsible for DECREF()ing them. - if ninputs: - f.write(f"{space}if ({cond}) goto pop_{ninputs}_{label};\n") - else: - f.write(f"{space}if ({cond}) goto {label};\n") - else: - f.write(line) - if always_exits(instr.block): - # None of the rest matters - return cache_offset - # Stack effect - noutputs = len(outputs) - diff = noutputs - ninputs - if diff > 0: - f.write(f"{indent} STACK_GROW({diff});\n") - elif diff < 0: - f.write(f"{indent} STACK_SHRINK({-diff});\n") - for i, output in enumerate(reversed(outputs), 1): - if output.name not in input_names and output.name != "unused": - f.write(f"{indent} POKE({i}, {output.name});\n") - # Cache effect - if cache_offset: - f.write(f"{indent} next_instr += {cache_offset};\n") - return cache_offset - - -def write_cases( - f: TextIO, instrs: list[InstDef], supers: list[parser.Super], families: list[parser.Family] -) -> dict[str, tuple[int, int, int]]: - predictions: set[str] = set() - for instr in instrs: - for target in re.findall(RE_PREDICTED, instr.block.text): - predictions.add(target) - indent = " " - f.write(f"// This file is generated by {os.path.relpath(__file__)}\n") - f.write(f"// Do not edit!\n") - instr_index: dict[str, InstDef] = {} - effects_table: dict[str, tuple[int, int, int]] = {} # name -> (ninputs, noutputs, cache_offset) - for instr in instrs: - instr_index[instr.name] = instr - f.write(f"\n{indent}TARGET({instr.name}) {{\n") - if instr.name in predictions: - f.write(f"{indent} PREDICTED({instr.name});\n") - cache_offset = write_instr( - instr, predictions, indent, f, - cache_size=find_cache_size(instr, families) - ) - effects_table[instr.name] = len(instr.inputs), len(instr.outputs), cache_offset - if not always_exits(instr.block): - f.write(f"{indent} DISPATCH();\n") - # Write trailing '}' - f.write(f"{indent}}}\n") - - for sup in supers: - components = [instr_index[name] for name in sup.ops] - f.write(f"\n{indent}TARGET({sup.name}) {{\n") - for i, instr in enumerate(components): - if i > 0: - f.write(f"{indent} NEXTOPARG();\n") - f.write(f"{indent} next_instr++;\n") - f.write(f"{indent} {{\n") - write_instr(instr, predictions, indent, f, dedent=-4) - f.write(f" {indent}}}\n") - f.write(f"{indent} DISPATCH();\n") - f.write(f"{indent}}}\n") - - return effects_table + return line.startswith( + ("goto ", "return ", "DISPATCH", "GO_TO_", "Py_UNREACHABLE()") + ) def main(): - args = arg_parser.parse_args() - with open(args.input) as f: - src = f.read() - instrs, supers, families = parse_cases(src, filename=args.input) - ninstrs = len(instrs) - nsupers = len(supers) - nfamilies = len(families) - print( - f"Read {ninstrs} instructions, {nsupers} supers, " - f"and {nfamilies} families from {args.input}", - file=sys.stderr, - ) - with open(args.output, "w") as f: - effects_table = write_cases(f, instrs, supers, families) - print( - f"Wrote {ninstrs + nsupers} instructions to {args.output}", - file=sys.stderr, - ) - # Check that families have consistent effects - errors = 0 - for family in families: - head = effects_table[family.members[0]] - for member in family.members: - if effects_table[member] != head: - errors += 1 - print( - f"Family {family.name!r} has inconsistent effects (inputs, outputs, cache units):", - file=sys.stderr, - ) - print( - f" {family.members[0]} = {head}; {member} = {effects_table[member]}", - ) - if errors: - sys.exit(1) + """Parse command line, parse input, analyze, write output.""" + args = arg_parser.parse_args() # Prints message and sys.exit(2) on error + a = Analyzer(args.input) # Raises OSError if file not found + a.parse() # Raises SyntaxError on failure + a.analyze() # Prints messages and raises SystemExit on failure + a.write_instructions(args.output) # Raises OSError if file can't be written if __name__ == "__main__": diff --git a/Tools/cases_generator/parser.py b/Tools/cases_generator/parser.py index 1f855312aeba9f..e4df69993edb1c 100644 --- a/Tools/cases_generator/parser.py +++ b/Tools/cases_generator/parser.py @@ -58,18 +58,17 @@ class Block(Node): @dataclass class Effect(Node): - pass + name: str @dataclass class StackEffect(Effect): - name: str + pass # TODO: type, condition @dataclass class CacheEffect(Effect): - name: str size: int From 4cfcf77558f5da1c0375f01f15ad27f41ed7e9f4 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sat, 12 Nov 2022 08:32:36 -0800 Subject: [PATCH 23/29] Further refactor --- Tools/cases_generator/generate_cases.py | 340 ++++++++++++------------ 1 file changed, 166 insertions(+), 174 deletions(-) diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 2fbc8577b2a00f..9d3fcf25fc6f1c 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -23,11 +23,137 @@ arg_parser.add_argument("-o", "--output", type=str, default=DEFAULT_OUTPUT) +# This is not a data class +class Instruction(parser.InstDef): + """An instruction with additional data and code.""" + + # Computed by constructor + always_exits: bool + cache_offset: int + cache_effects: list[parser.CacheEffect] + input_effects: list[parser.StackEffect] + output_effects: list[parser.StackEffect] + + # Set later + family: parser.Family | None = None + predicted: bool = False + + def __init__(self, inst: parser.InstDef): + super().__init__(inst.header, inst.block) + self.always_exits = always_exits(self.block) + self.cache_effects = [ + effect for effect in self.inputs if isinstance(effect, parser.CacheEffect) + ] + self.cache_offset = sum(c.size for c in self.cache_effects) + self.input_effects = [ + effect for effect in self.inputs if isinstance(effect, parser.StackEffect) + ] + self.output_effects = self.outputs # For consistency/completeness + + def write( + self, f: typing.TextIO, indent: str, dedent: int = 0 + ) -> None: + """Write one instruction, sans prologue and epilogue.""" + if dedent < 0: + indent += " " * -dedent # DO WE NEED THIS? + + # Get cache offset and maybe assert that it is correct + if family := self.family: + if self.name == family.members[0]: + if cache_size := family.size: + f.write( + f"{indent} static_assert({cache_size} == " + f'{self.cache_offset}, "incorrect cache size");\n' + ) + + # Write cache effect variable declarations + for ceffect in self.cache_effects: + if ceffect.name != "unused": + bits = ceffect.size * 16 + f.write( + f"{indent} PyObject *{ceffect.name} = " + f"read{bits}(next_instr + {self.cache_offset});\n" + ) + + # Write input stack effect variable declarations and initializations + for i, seffect in enumerate(reversed(self.input_effects), 1): + if seffect.name != "unused": + f.write(f"{indent} PyObject *{seffect.name} = PEEK({i});\n") + + # Write output stack effect variable declarations + for seffect in self.output_effects: + if seffect.name != "unused": + f.write(f"{indent} PyObject *{seffect.name};\n") + + self.write_body(f, indent, dedent) + + # Skip the rest if the block always exits + if always_exits(self.block): + return + + # Write net stack growth/shrinkage + diff = len(self.output_effects) - len(self.input_effects) + if diff > 0: + f.write(f"{indent} STACK_GROW({diff});\n") + elif diff < 0: + f.write(f"{indent} STACK_SHRINK({-diff});\n") + + # Write output stack effect assignments + input_names = [seffect.name for seffect in self.input_effects] + for i, output in enumerate(reversed(self.output_effects), 1): + if output.name not in input_names and output.name != "unused": + f.write(f"{indent} POKE({i}, {output.name});\n") + + # Write cache effect + if self.cache_offset: + f.write(f"{indent} next_instr += {self.cache_offset};\n") + + def write_body( + self, f: typing.TextIO, ndent: str, dedent: int + ) -> None: + """Write the instruction body.""" + + # Get lines of text with proper dedelt + blocklines = self.block.to_text(dedent=dedent).splitlines(True) + + # Remove blank lines from both ends + while blocklines and not blocklines[0].strip(): + blocklines.pop(0) + while blocklines and not blocklines[-1].strip(): + blocklines.pop() + + # Remove leading and trailing braces + assert blocklines and blocklines[0].strip() == "{" + assert blocklines and blocklines[-1].strip() == "}" + blocklines.pop() + blocklines.pop(0) + + # Remove trailing blank lines + while blocklines and not blocklines[-1].strip(): + blocklines.pop() + + # Write the body, substituting a goto for ERROR_IF() + for line in blocklines: + if m := re.match(r"(\s*)ERROR_IF\((.+), (\w+)\);\s*$", line): + space, cond, label = m.groups() + # ERROR_IF() must pop the inputs from the stack. + # The code block is responsible for DECREF()ing them. + # NOTE: If the label doesn't exist, just add it to ceval.c. + ninputs = len(self.input_effects) + if ninputs: + f.write(f"{space}if ({cond}) goto pop_{ninputs}_{label};\n") + else: + f.write(f"{space}if ({cond}) goto {label};\n") + else: + f.write(line) + + class Analyzer: """Parse input, analyze it, and write to output.""" filename: str src: str + errors: int = 0 def __init__(self, filename: str): """Read the input file.""" @@ -35,7 +161,7 @@ def __init__(self, filename: str): with open(filename) as f: self.src = f.read() - instrs: dict[str, parser.InstDef] + instrs: dict[str, Instruction] supers: dict[str, parser.Super] families: dict[str, parser.Family] @@ -48,15 +174,15 @@ def parse(self) -> None: if tkn.text == BEGIN_MARKER: break else: - raise psr.make_syntax_error(f"Couldn't find {marker!r} in {psr.filename}") + raise psr.make_syntax_error(f"Couldn't find {BEGIN_MARKER!r} in {psr.filename}") # Parse until end marker self.instrs = {} self.supers = {} self.families = {} while (tkn := psr.peek(raw=True)) and tkn.text != END_MARKER: - if instr := psr.inst_def(): - self.instrs[instr.name] = instr + if inst := psr.inst_def(): + self.instrs[inst.name] = Instruction(inst) elif super := psr.super_def(): self.supers[super.name] = super elif family := psr.family_def(): @@ -77,64 +203,34 @@ def analyze(self) -> None: Raises SystemExit if there is an error. """ self.find_predictions() - self.compute_cache_offsets() - self.compute_stack_inputs() - self.compute_stack_outputs() self.map_families() - errors = self.check_families() - if errors: - sys.exit(f"Found {errors} errors") - - predictions: set[str] = set() + self.check_families() def find_predictions(self) -> None: """Find the instructions that need PREDICTED() labels.""" - self.predictions = set() for instr in self.instrs.values(): for target in re.findall(RE_PREDICTED, instr.block.text): - self.predictions.add(target) - - cache_offsets: dict[str, int] - - def compute_cache_offsets(self) -> None: - """Compute the amount of cache space used per instruction.""" - self.cache_offsets = {} - for instr in self.instrs.values(): - cache_offset = 0 - for effect in instr.inputs: - if isinstance(effect, parser.CacheEffect): - cache_offset += effect.size - self.cache_offsets[instr.name] = cache_offset - - stack_inputs: dict[str, int] - - def compute_stack_inputs(self) -> None: - """Compute the number of stack items consumed per instruction.""" - self.stack_inputs = {} - for instr in self.instrs.values(): - stack_input = 0 - for effect in instr.inputs: - if isinstance(effect, parser.StackEffect): - stack_input += 1 - self.stack_inputs[instr.name] = stack_input - - stack_outputs: dict[str, int] - - def compute_stack_outputs(self) -> None: - """Compute the number of stack items produced per instruction.""" - self.stack_outputs = {} - for instr in self.instrs.values(): - stack_output = len(instr.outputs) - self.stack_outputs[instr.name] = stack_output - - family: dict[str, parser.Family] # instruction name -> family + if target_instr := self.instrs.get(target): + target_instr.predicted = True + else: + print( + f"Unknown instruction {target!r} predicted in {instr.name!r}", + file=sys.stderr, + ) + self.errors += 1 def map_families(self) -> None: """Make instruction names back to their family, if they have one.""" - self.family = {} for family in self.families.values(): for member in family.members: - self.family[member] = family + if member_instr := self.instrs.get(member): + member_instr.family = family + else: + print( + f"Unknown instruction {member!r} referenced in family {family.name!r}", + file=sys.stderr, + ) + self.errors += 1 def check_families(self) -> int: """Check each family: @@ -143,28 +239,28 @@ def check_families(self) -> int: - All members must be known instructions - All members must have the same cache, input and output effects """ - errors = 0 for family in self.families.values(): if len(family.members) < 2: print(f"Family {family.name!r} has insufficient members") - errors += 1 + self.errors += 1 members = [member for member in family.members if member in self.instrs] if members != family.members: unknown = set(family.members) - set(members) print(f"Family {family.name!r} has unknown members: {unknown}") - errors += 1 + self.errors += 1 if len(members) < 2: continue - head = members[0] - cache = self.cache_offsets[head] - input = self.stack_inputs[head] - output = self.stack_outputs[head] + head = self.instrs[members[0]] + cache = head.cache_offset + input = len(head.input_effects) + output = len(head.output_effects) for member in members[1:]: - c = self.cache_offsets[member] - i = self.stack_inputs[member] - o = self.stack_outputs[member] + instr = self.instrs[member] + c = instr.cache_offset + i = len(instr.input_effects) + o = len(instr.output_effects) if (c, i, o) != (cache, input, output): - errors += 1 + self.errors += 1 print( f"Family {family.name!r} has inconsistent " f"(cache, inputs, outputs) effects:", @@ -175,13 +271,11 @@ def check_families(self) -> int: f"{member} = {(c, i, o)}", file=sys.stderr, ) - return errors - - indent: str = " " * 8 + self.errors += 1 def write_instructions(self, filename: str) -> None: """Write instructions to output file.""" - indent = self.indent + indent = " " * 8 with open(filename, "w") as f: # Write provenance header f.write(f"// This file is generated by {os.path.relpath(__file__)}\n") @@ -191,9 +285,9 @@ def write_instructions(self, filename: str) -> None: # Write regular instructions for name, instr in self.instrs.items(): f.write(f"\n{indent}TARGET({name}) {{\n") - if name in self.predictions: + if instr.predicted: f.write(f"{indent} PREDICTED({name});\n") - self.write_instr(f, instr) + instr.write(f, indent) if not always_exits(instr.block): f.write(f"{indent} DISPATCH();\n") f.write(f"{indent}}}\n") @@ -207,7 +301,7 @@ def write_instructions(self, filename: str) -> None: f.write(f"{indent} NEXTOPARG();\n") f.write(f"{indent} next_instr++;\n") f.write(f"{indent} {{\n") - self.write_instr(f, instr, dedent=-4) + instr.write(f, indent, dedent=-4) f.write(f" {indent}}}\n") f.write(f"{indent} DISPATCH();\n") f.write(f"{indent}}}\n") @@ -218,111 +312,6 @@ def write_instructions(self, filename: str) -> None: file=sys.stderr, ) - def write_instr( - self, f: typing.TextIO, instr: parser.InstDef, dedent: int = 0 - ) -> None: - """Write one instruction, sans prologue and epilogue.""" - indent = self.indent - if dedent < 0: - indent += " " * -dedent # DO WE NEED THIS? - - # Get cache offset and maybe assert that it is correct - cache_offset = self.cache_offsets.get(instr.name, 0) - if family := self.family.get(instr.name): - if instr.name == family.members[0]: - if cache_size := family.size: - f.write( - f"{indent} static_assert({cache_size} == " - f'{cache_offset}, "incorrect cache size");\n' - ) - - # Separate cache effects from input stack effects - cache = [ - effect for effect in instr.inputs if isinstance(effect, parser.CacheEffect) - ] - stack = [ - effect for effect in instr.inputs if isinstance(effect, parser.StackEffect) - ] - - # Write cache effect variable declarations - for ceffect in cache: - if ceffect.name != "unused": - bits = ceffect.size * 16 - f.write( - f"{indent} PyObject *{ceffect.name} = " - f"read{bits}(next_instr + {cache_offset});\n" - ) - - # Write input stack effect variable declarations and initializations - for i, seffect in enumerate(reversed(stack), 1): - if seffect.name != "unused": - f.write(f"{indent} PyObject *{seffect.name} = PEEK({i});\n") - - # Write output stack effect variable declarations - for seffect in instr.outputs: - if seffect.name != "unused": - f.write(f"{indent} PyObject *{seffect.name};\n") - - self.write_instr_body(f, instr, dedent, len(stack)) - - # Skip the rest if the block always exits - if always_exits(instr.block): - return - - # Write net stack growth/shrinkage - diff = len(instr.outputs) - len(stack) - if diff > 0: - f.write(f"{indent} STACK_GROW({diff});\n") - elif diff < 0: - f.write(f"{indent} STACK_SHRINK({-diff});\n") - - # Write output stack effect assignments - input_names = [seffect.name for seffect in stack] - for i, output in enumerate(reversed(instr.outputs), 1): - if output.name not in input_names and output.name != "unused": - f.write(f"{indent} POKE({i}, {output.name});\n") - - # Write cache effect - if cache_offset: - f.write(f"{indent} next_instr += {cache_offset};\n") - - def write_instr_body( - self, f: typing.TextIO, instr: parser.InstDef, dedent: int, ninputs: int - ) -> None: - """Write the instruction body.""" - - # Get lines of text with proper dedelt - blocklines = instr.block.to_text(dedent=dedent).splitlines(True) - - # Remove blank lines from both ends - while blocklines and not blocklines[0].strip(): - blocklines.pop(0) - while blocklines and not blocklines[-1].strip(): - blocklines.pop() - - # Remove leading '{' and trailing '}' - assert blocklines and blocklines[0].strip() == "{" - assert blocklines and blocklines[-1].strip() == "}" - blocklines.pop() - blocklines.pop(0) - - # Remove trailing blank lines - while blocklines and not blocklines[-1].strip(): - blocklines.pop() - - # Write the body, substituting a goto for ERROR_IF() - for line in blocklines: - if m := re.match(r"(\s*)ERROR_IF\((.+), (\w+)\);\s*$", line): - space, cond, label = m.groups() - # ERROR_IF() must remove the inputs from the stack. - # The code block is responsible for DECREF()ing them. - if ninputs: - f.write(f"{space}if ({cond}) goto pop_{ninputs}_{label};\n") - else: - f.write(f"{space}if ({cond}) goto {label};\n") - else: - f.write(line) - def always_exits(block: parser.Block) -> bool: """Determine whether a block always ends in a return/goto/etc.""" @@ -351,6 +340,9 @@ def main(): a = Analyzer(args.input) # Raises OSError if file not found a.parse() # Raises SyntaxError on failure a.analyze() # Prints messages and raises SystemExit on failure + if a.errors: + sys.exit(f"Found {a.errors} errors") + a.write_instructions(args.output) # Raises OSError if file can't be written From 382e248b84e93306c3c3a6c8459c37ff6fbd77f7 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sun, 13 Nov 2022 07:50:00 -0800 Subject: [PATCH 24/29] Make {In,Out}putEffect unions instead of classes --- Tools/cases_generator/parser.py | 53 +++++++++++++++++---------------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/Tools/cases_generator/parser.py b/Tools/cases_generator/parser.py index e4df69993edb1c..a5f25f20436f36 100644 --- a/Tools/cases_generator/parser.py +++ b/Tools/cases_generator/parser.py @@ -57,26 +57,26 @@ class Block(Node): @dataclass -class Effect(Node): +class StackEffect(Node): name: str - - -@dataclass -class StackEffect(Effect): - pass # TODO: type, condition @dataclass -class CacheEffect(Effect): +class CacheEffect(Node): + name: str size: int +InputEffect = StackEffect | CacheEffect +OutputEffect = StackEffect + + @dataclass class InstHeader(Node): name: str - inputs: list[Effect] - outputs: list[Effect] + inputs: list[InputEffect] + outputs: list[OutputEffect] @dataclass @@ -89,13 +89,12 @@ def name(self) -> str: return self.header.name @property - def inputs(self) -> list[Effect]: + def inputs(self) -> list[InputEffect]: return self.header.inputs @property def outputs(self) -> list[StackEffect]: - # This is always true - return [x for x in self.header.outputs if isinstance(x, StackEffect)] + return self.header.outputs @dataclass @@ -125,7 +124,7 @@ def inst_def(self) -> InstDef | None: def inst_header(self) -> InstHeader | None: # inst(NAME) | inst(NAME, (inputs -- outputs)) # TODO: Error out when there is something unexpected. - # TODO: Make INST a keyword in the lexer.`` + # TODO: Make INST a keyword in the lexer. if (tkn := self.expect(lx.IDENTIFIER)) and tkn.text == "inst": if (self.expect(lx.LPAREN) and (tkn := self.expect(lx.IDENTIFIER))): @@ -141,26 +140,28 @@ def inst_header(self) -> InstHeader | None: return InstHeader(name, [], []) return None - def check_overlaps(self, inp: list[Effect], outp: list[Effect]): - for i, name in enumerate(inp): - for j, name2 in enumerate(outp): - if name == name2: + def check_overlaps(self, inputs: list[InputEffect], outputs: list[OutputEffect]): + # TODO: This belongs in generate_cases.py::Analyzer::analyze() + inputs = [inp for inp in inputs if isinstance(inp, StackEffect)] # Select only stack effects + for i, inp in enumerate(inputs): + for j, outp in enumerate(outputs): + if inp.name == outp.name and inp.name != "unused": if i != j: raise self.make_syntax_error( - f"Input {name!r} at pos {i} repeated in output at different pos {j}") + f"Input {inp.name!r} at pos {i} repeated in output at different pos {j}") break - def stack_effect(self) -> tuple[list[Effect], list[Effect]]: + def stack_effect(self) -> tuple[list[InputEffect], list[OutputEffect]]: # '(' [inputs] '--' [outputs] ')' if self.expect(lx.LPAREN): - inp = self.inputs() or [] + inputs = self.inputs() or [] if self.expect(lx.MINUSMINUS): - outp = self.outputs() or [] + outputs = self.outputs() or [] if self.expect(lx.RPAREN): - return inp, outp + return inputs, outputs raise self.make_syntax_error("Expected stack effect") - def inputs(self) -> list[Effect] | None: + def inputs(self) -> list[InputEffect] | None: # input (',' input)* here = self.getpos() if inp := self.input(): @@ -174,7 +175,7 @@ def inputs(self) -> list[Effect] | None: return None @contextual - def input(self) -> Effect | None: + def input(self) -> InputEffect | None: # IDENTIFIER '/' INTEGER (CacheEffect) # IDENTIFIER (StackEffect) if (tkn := self.expect(lx.IDENTIFIER)): @@ -191,7 +192,7 @@ def input(self) -> Effect | None: else: return StackEffect(tkn.text) - def outputs(self) -> list[Effect] | None: + def outputs(self) -> list[OutputEffect] | None: # output (, output)* here = self.getpos() if outp := self.output(): @@ -205,7 +206,7 @@ def outputs(self) -> list[Effect] | None: return None @contextual - def output(self) -> Effect | None: + def output(self) -> OutputEffect | None: if (tkn := self.expect(lx.IDENTIFIER)): return StackEffect(tkn.text) From b5c8a9f314f45f5660d3f46ab8b67848172f4245 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sun, 13 Nov 2022 07:54:19 -0800 Subject: [PATCH 25/29] Fix some mypy errors --- Tools/cases_generator/generate_cases.py | 2 +- Tools/cases_generator/lexer.py | 6 +++--- Tools/cases_generator/plexer.py | 7 ++++--- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 9d3fcf25fc6f1c..0a536d8aadc743 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -232,7 +232,7 @@ def map_families(self) -> None: ) self.errors += 1 - def check_families(self) -> int: + def check_families(self) -> None: """Check each family: - Must have at least 2 members diff --git a/Tools/cases_generator/lexer.py b/Tools/cases_generator/lexer.py index c5320c03d54631..493a32e38166d7 100644 --- a/Tools/cases_generator/lexer.py +++ b/Tools/cases_generator/lexer.py @@ -115,7 +115,7 @@ def choice(*opts): matcher = re.compile(choice(id_re, number_re, str_re, char, newline, macro, comment_re, *operators.values())) letter = re.compile(r'[a-zA-Z_]') -keywords = ( +kwds = ( 'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST', 'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE', 'ELSE', 'ENUM', 'EXTERN', 'FLOAT', 'FOR', 'GOTO', 'IF', 'INLINE', 'INT', 'LONG', @@ -124,9 +124,9 @@ def choice(*opts): 'SWITCH', 'TYPEDEF', 'UNION', 'UNSIGNED', 'VOID', 'VOLATILE', 'WHILE' ) -for name in keywords: +for name in kwds: globals()[name] = name -keywords = { name.lower() : name for name in keywords } +keywords = { name.lower() : name for name in kwds } def make_syntax_error( diff --git a/Tools/cases_generator/plexer.py b/Tools/cases_generator/plexer.py index 107d608152cebe..a73254ed5b1daa 100644 --- a/Tools/cases_generator/plexer.py +++ b/Tools/cases_generator/plexer.py @@ -3,7 +3,7 @@ class PLexer: - def __init__(self, src: str, filename: str|None = None): + def __init__(self, src: str, filename: str): self.src = src self.filename = filename self.tokens = list(lx.tokenize(self.src, filename=filename)) @@ -89,16 +89,17 @@ def make_syntax_error(self, message: str, tkn: Token|None = None) -> SyntaxError filename = sys.argv[1] if filename == "-c" and sys.argv[2:]: src = sys.argv[2] - filename = None + filename = "" else: with open(filename) as f: src = f.read() else: - filename = None + filename = "" src = "if (x) { x.foo; // comment\n}" p = PLexer(src, filename) while not p.eof(): tok = p.next(raw=True) + assert tok left = repr(tok) right = lx.to_text([tok]).rstrip() print(f"{left:40.40} {right}") From 387fbe1d85a25ed8b65e2a1a7f453b0dbe9f46d9 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sun, 13 Nov 2022 08:34:58 -0800 Subject: [PATCH 26/29] Move check_overlaps to generate_cases.py --- Tools/cases_generator/generate_cases.py | 16 +++++++++++++++- Tools/cases_generator/parser.py | 12 ------------ 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 0a536d8aadc743..481b331649464f 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -40,6 +40,7 @@ class Instruction(parser.InstDef): def __init__(self, inst: parser.InstDef): super().__init__(inst.header, inst.block) + self.context = inst.context self.always_exits = always_exits(self.block) self.cache_effects = [ effect for effect in self.inputs if isinstance(effect, parser.CacheEffect) @@ -50,6 +51,18 @@ def __init__(self, inst: parser.InstDef): ] self.output_effects = self.outputs # For consistency/completeness + def check_overlaps(self, psr: parser.Parser) -> None: + for i, inp in enumerate(self.input_effects): + for j, outp in enumerate(self.output_effects): + if inp.name == outp.name and inp.name != "unused": + if i != j: + tkn = psr.tokens[ctx.begin] if (ctx := self.context) else None + raise psr.make_syntax_error( + f"Input {inp.name!r} at pos {i} repeated in output at different pos {j}", + tkn, + ) + break + def write( self, f: typing.TextIO, indent: str, dedent: int = 0 ) -> None: @@ -182,7 +195,8 @@ def parse(self) -> None: self.families = {} while (tkn := psr.peek(raw=True)) and tkn.text != END_MARKER: if inst := psr.inst_def(): - self.instrs[inst.name] = Instruction(inst) + self.instrs[inst.name] = instr = Instruction(inst) + instr.check_overlaps(psr) elif super := psr.super_def(): self.supers[super.name] = super elif family := psr.family_def(): diff --git a/Tools/cases_generator/parser.py b/Tools/cases_generator/parser.py index a5f25f20436f36..c511607fdf70ec 100644 --- a/Tools/cases_generator/parser.py +++ b/Tools/cases_generator/parser.py @@ -134,23 +134,11 @@ def inst_header(self) -> InstHeader | None: if self.expect(lx.RPAREN): if ((tkn := self.peek()) and tkn.kind == lx.LBRACE): - self.check_overlaps(inp, outp) return InstHeader(name, inp, outp) elif self.expect(lx.RPAREN): return InstHeader(name, [], []) return None - def check_overlaps(self, inputs: list[InputEffect], outputs: list[OutputEffect]): - # TODO: This belongs in generate_cases.py::Analyzer::analyze() - inputs = [inp for inp in inputs if isinstance(inp, StackEffect)] # Select only stack effects - for i, inp in enumerate(inputs): - for j, outp in enumerate(outputs): - if inp.name == outp.name and inp.name != "unused": - if i != j: - raise self.make_syntax_error( - f"Input {inp.name!r} at pos {i} repeated in output at different pos {j}") - break - def stack_effect(self) -> tuple[list[InputEffect], list[OutputEffect]]: # '(' [inputs] '--' [outputs] ')' if self.expect(lx.LPAREN): From d6821184cf573e70812e608c8f9618c57fa9768d Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sun, 13 Nov 2022 14:39:53 -0800 Subject: [PATCH 27/29] Fix cache effect when used --- Tools/cases_generator/generate_cases.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 481b331649464f..7c64ca0dfa464d 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -80,13 +80,18 @@ def write( ) # Write cache effect variable declarations + cache_offset = 0 for ceffect in self.cache_effects: if ceffect.name != "unused": + # TODO: if name is 'descr' use PyObject *descr = read_obj(...) bits = ceffect.size * 16 - f.write( - f"{indent} PyObject *{ceffect.name} = " - f"read{bits}(next_instr + {self.cache_offset});\n" - ) + f.write(f"{indent} uint{bits}_t {ceffect.name} = ") + if ceffect.size == 1: + f.write(f"*(next_instr + {cache_offset});\n") + else: + f.write(f"read_u{bits}(next_instr + {cache_offset});\n") + cache_offset += ceffect.size + assert cache_offset == self.cache_offset # Write input stack effect variable declarations and initializations for i, seffect in enumerate(reversed(self.input_effects), 1): From 14e75f1970d38101579c8e34c0de477d1e7381af Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sun, 13 Nov 2022 15:12:04 -0800 Subject: [PATCH 28/29] Demonstrate cache effect with BINARY_SUBSCR This was a bin subtle, esp. BINARY_SUBSCR_DICT (which plays games with refcounts) and BINARY_SUBSCR_GETITEM (which ends with a goto). --- Python/bytecodes.c | 62 +++++++++++++++----------------------- Python/generated_cases.c.h | 61 ++++++++++++++++++++----------------- 2 files changed, 59 insertions(+), 64 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index f11b7163935e81..bbf51d8131f74c 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -79,6 +79,7 @@ do { \ // Dummy variables for stack effects. static PyObject *value, *value1, *value2, *left, *right, *res, *sum, *prod, *sub; static PyObject *container, *start, *stop, *v, *lhs, *rhs; +static PyObject *list, *tuple, *dict; static PyObject * dummy_func( @@ -323,7 +324,15 @@ dummy_func( ERROR_IF(sum == NULL, error); } - inst(BINARY_SUBSCR, (container, sub -- res)) { + family(binary_subscr, INLINE_CACHE_ENTRIES_BINARY_SUBSCR) = { + BINARY_SUBSCR, + BINARY_SUBSCR_DICT, + BINARY_SUBSCR_GETITEM, + BINARY_SUBSCR_LIST_INT, + BINARY_SUBSCR_TUPLE_INT, + }; + + inst(BINARY_SUBSCR, (container, sub, unused/4 -- res)) { _PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr; if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { assert(cframe.use_tracing == 0); @@ -337,7 +346,6 @@ dummy_func( Py_DECREF(container); Py_DECREF(sub); ERROR_IF(res == NULL, error); - JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR); } inst(BINARY_SLICE, (container, start, stop -- res)) { @@ -370,11 +378,8 @@ dummy_func( ERROR_IF(err, error); } - // stack effect: (__0 -- ) - inst(BINARY_SUBSCR_LIST_INT) { + inst(BINARY_SUBSCR_LIST_INT, (list, sub, unused/4 -- res)) { assert(cframe.use_tracing == 0); - PyObject *sub = TOP(); - PyObject *list = SECOND(); DEOPT_IF(!PyLong_CheckExact(sub), BINARY_SUBSCR); DEOPT_IF(!PyList_CheckExact(list), BINARY_SUBSCR); @@ -385,21 +390,15 @@ dummy_func( Py_ssize_t index = ((PyLongObject*)sub)->ob_digit[0]; DEOPT_IF(index >= PyList_GET_SIZE(list), BINARY_SUBSCR); STAT_INC(BINARY_SUBSCR, hit); - PyObject *res = PyList_GET_ITEM(list, index); + res = PyList_GET_ITEM(list, index); assert(res != NULL); Py_INCREF(res); - STACK_SHRINK(1); _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); - SET_TOP(res); Py_DECREF(list); - JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR); } - // stack effect: (__0 -- ) - inst(BINARY_SUBSCR_TUPLE_INT) { + inst(BINARY_SUBSCR_TUPLE_INT, (tuple, sub, unused/4 -- res)) { assert(cframe.use_tracing == 0); - PyObject *sub = TOP(); - PyObject *tuple = SECOND(); DEOPT_IF(!PyLong_CheckExact(sub), BINARY_SUBSCR); DEOPT_IF(!PyTuple_CheckExact(tuple), BINARY_SUBSCR); @@ -410,51 +409,40 @@ dummy_func( Py_ssize_t index = ((PyLongObject*)sub)->ob_digit[0]; DEOPT_IF(index >= PyTuple_GET_SIZE(tuple), BINARY_SUBSCR); STAT_INC(BINARY_SUBSCR, hit); - PyObject *res = PyTuple_GET_ITEM(tuple, index); + res = PyTuple_GET_ITEM(tuple, index); assert(res != NULL); Py_INCREF(res); - STACK_SHRINK(1); _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); - SET_TOP(res); Py_DECREF(tuple); - JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR); } - // stack effect: (__0 -- ) - inst(BINARY_SUBSCR_DICT) { + inst(BINARY_SUBSCR_DICT, (dict, sub, unused/4 -- res)) { assert(cframe.use_tracing == 0); - PyObject *dict = SECOND(); - DEOPT_IF(!PyDict_CheckExact(SECOND()), BINARY_SUBSCR); + DEOPT_IF(!PyDict_CheckExact(dict), BINARY_SUBSCR); STAT_INC(BINARY_SUBSCR, hit); - PyObject *sub = TOP(); - PyObject *res = PyDict_GetItemWithError(dict, sub); + res = PyDict_GetItemWithError(dict, sub); + Py_DECREF(dict); if (res == NULL) { if (!_PyErr_Occurred(tstate)) { _PyErr_SetKeyError(sub); } - goto error; + else { + Py_DECREF(sub); + } + ERROR_IF(1, error); } - Py_INCREF(res); - STACK_SHRINK(1); Py_DECREF(sub); - SET_TOP(res); - Py_DECREF(dict); - JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR); + Py_INCREF(res); } - // stack effect: (__0 -- ) - inst(BINARY_SUBSCR_GETITEM) { - PyObject *sub = TOP(); - PyObject *container = SECOND(); - _PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr; - uint32_t type_version = read_u32(cache->type_version); + inst(BINARY_SUBSCR_GETITEM, (container, sub, unused/1, type_version/2, func_version/1 -- unused)) { PyTypeObject *tp = Py_TYPE(container); DEOPT_IF(tp->tp_version_tag != type_version, BINARY_SUBSCR); assert(tp->tp_flags & Py_TPFLAGS_HEAPTYPE); PyObject *cached = ((PyHeapTypeObject *)tp)->_spec_cache.getitem; assert(PyFunction_Check(cached)); PyFunctionObject *getitem = (PyFunctionObject *)cached; - DEOPT_IF(getitem->func_version != cache->func_version, BINARY_SUBSCR); + DEOPT_IF(getitem->func_version != func_version, BINARY_SUBSCR); PyCodeObject *code = (PyCodeObject *)getitem->func_code; assert(code->co_argcount == 2); DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), BINARY_SUBSCR); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index b68b93cf87ea47..7d7f30e5ab6e34 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -301,6 +301,7 @@ TARGET(BINARY_SUBSCR) { PREDICTED(BINARY_SUBSCR); + static_assert(INLINE_CACHE_ENTRIES_BINARY_SUBSCR == 4, "incorrect cache size"); PyObject *sub = PEEK(1); PyObject *container = PEEK(2); PyObject *res; @@ -317,9 +318,9 @@ Py_DECREF(container); Py_DECREF(sub); if (res == NULL) goto pop_2_error; - JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR); STACK_SHRINK(1); POKE(1, res); + next_instr += 4; DISPATCH(); } @@ -367,9 +368,10 @@ } TARGET(BINARY_SUBSCR_LIST_INT) { + PyObject *sub = PEEK(1); + PyObject *list = PEEK(2); + PyObject *res; assert(cframe.use_tracing == 0); - PyObject *sub = TOP(); - PyObject *list = SECOND(); DEOPT_IF(!PyLong_CheckExact(sub), BINARY_SUBSCR); DEOPT_IF(!PyList_CheckExact(list), BINARY_SUBSCR); @@ -380,21 +382,22 @@ Py_ssize_t index = ((PyLongObject*)sub)->ob_digit[0]; DEOPT_IF(index >= PyList_GET_SIZE(list), BINARY_SUBSCR); STAT_INC(BINARY_SUBSCR, hit); - PyObject *res = PyList_GET_ITEM(list, index); + res = PyList_GET_ITEM(list, index); assert(res != NULL); Py_INCREF(res); - STACK_SHRINK(1); _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); - SET_TOP(res); Py_DECREF(list); - JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR); + STACK_SHRINK(1); + POKE(1, res); + next_instr += 4; DISPATCH(); } TARGET(BINARY_SUBSCR_TUPLE_INT) { + PyObject *sub = PEEK(1); + PyObject *tuple = PEEK(2); + PyObject *res; assert(cframe.use_tracing == 0); - PyObject *sub = TOP(); - PyObject *tuple = SECOND(); DEOPT_IF(!PyLong_CheckExact(sub), BINARY_SUBSCR); DEOPT_IF(!PyTuple_CheckExact(tuple), BINARY_SUBSCR); @@ -405,51 +408,55 @@ Py_ssize_t index = ((PyLongObject*)sub)->ob_digit[0]; DEOPT_IF(index >= PyTuple_GET_SIZE(tuple), BINARY_SUBSCR); STAT_INC(BINARY_SUBSCR, hit); - PyObject *res = PyTuple_GET_ITEM(tuple, index); + res = PyTuple_GET_ITEM(tuple, index); assert(res != NULL); Py_INCREF(res); - STACK_SHRINK(1); _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); - SET_TOP(res); Py_DECREF(tuple); - JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR); + STACK_SHRINK(1); + POKE(1, res); + next_instr += 4; DISPATCH(); } TARGET(BINARY_SUBSCR_DICT) { + PyObject *sub = PEEK(1); + PyObject *dict = PEEK(2); + PyObject *res; assert(cframe.use_tracing == 0); - PyObject *dict = SECOND(); - DEOPT_IF(!PyDict_CheckExact(SECOND()), BINARY_SUBSCR); + DEOPT_IF(!PyDict_CheckExact(dict), BINARY_SUBSCR); STAT_INC(BINARY_SUBSCR, hit); - PyObject *sub = TOP(); - PyObject *res = PyDict_GetItemWithError(dict, sub); + res = PyDict_GetItemWithError(dict, sub); + Py_DECREF(dict); if (res == NULL) { if (!_PyErr_Occurred(tstate)) { _PyErr_SetKeyError(sub); } - goto error; + else { + Py_DECREF(sub); + } + if (1) goto pop_2_error; } + Py_DECREF(sub); Py_INCREF(res); STACK_SHRINK(1); - Py_DECREF(sub); - SET_TOP(res); - Py_DECREF(dict); - JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR); + POKE(1, res); + next_instr += 4; DISPATCH(); } TARGET(BINARY_SUBSCR_GETITEM) { - PyObject *sub = TOP(); - PyObject *container = SECOND(); - _PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr; - uint32_t type_version = read_u32(cache->type_version); + uint32_t type_version = read_u32(next_instr + 1); + uint16_t func_version = *(next_instr + 3); + PyObject *sub = PEEK(1); + PyObject *container = PEEK(2); PyTypeObject *tp = Py_TYPE(container); DEOPT_IF(tp->tp_version_tag != type_version, BINARY_SUBSCR); assert(tp->tp_flags & Py_TPFLAGS_HEAPTYPE); PyObject *cached = ((PyHeapTypeObject *)tp)->_spec_cache.getitem; assert(PyFunction_Check(cached)); PyFunctionObject *getitem = (PyFunctionObject *)cached; - DEOPT_IF(getitem->func_version != cache->func_version, BINARY_SUBSCR); + DEOPT_IF(getitem->func_version != func_version, BINARY_SUBSCR); PyCodeObject *code = (PyCodeObject *)getitem->func_code; assert(code->co_argcount == 2); DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), BINARY_SUBSCR); From 1a5356b2ea8502a597cfc2ede2430916cb2dcd75 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sun, 13 Nov 2022 17:56:46 -0800 Subject: [PATCH 29/29] Fix test crashes by fiddling DECREF order in BINARY_SUBSCR_DICT --- Python/bytecodes.c | 9 ++++----- Python/generated_cases.c.h | 9 ++++----- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index bbf51d8131f74c..7da147261696f0 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -421,18 +421,17 @@ dummy_func( DEOPT_IF(!PyDict_CheckExact(dict), BINARY_SUBSCR); STAT_INC(BINARY_SUBSCR, hit); res = PyDict_GetItemWithError(dict, sub); - Py_DECREF(dict); if (res == NULL) { if (!_PyErr_Occurred(tstate)) { _PyErr_SetKeyError(sub); } - else { - Py_DECREF(sub); - } + Py_DECREF(dict); + Py_DECREF(sub); ERROR_IF(1, error); } + Py_INCREF(res); // Do this before DECREF'ing dict, sub + Py_DECREF(dict); Py_DECREF(sub); - Py_INCREF(res); } inst(BINARY_SUBSCR_GETITEM, (container, sub, unused/1, type_version/2, func_version/1 -- unused)) { diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 7d7f30e5ab6e34..01db10ad114356 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -427,18 +427,17 @@ DEOPT_IF(!PyDict_CheckExact(dict), BINARY_SUBSCR); STAT_INC(BINARY_SUBSCR, hit); res = PyDict_GetItemWithError(dict, sub); - Py_DECREF(dict); if (res == NULL) { if (!_PyErr_Occurred(tstate)) { _PyErr_SetKeyError(sub); } - else { - Py_DECREF(sub); - } + Py_DECREF(dict); + Py_DECREF(sub); if (1) goto pop_2_error; } + Py_INCREF(res); // Do this before DECREF'ing dict, sub + Py_DECREF(dict); Py_DECREF(sub); - Py_INCREF(res); STACK_SHRINK(1); POKE(1, res); next_instr += 4;