From 6b2d840b6d270a9841498ae3e7f6cb2f70670d60 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 13 Jun 2022 10:46:12 +0100 Subject: [PATCH 1/2] Use a lookup table to reduce overhead of getting line numbers during tracing. --- Include/cpython/code.h | 4 +- Include/internal/pycore_code.h | 28 ++++++++++ ...2-06-13-10-48-09.gh-issue-93516.yJSait.rst | 2 + Objects/codeobject.c | 55 +++++++++++++++++++ Python/ceval.c | 16 +++--- Tools/scripts/deepfreeze.py | 2 + 6 files changed, 99 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-06-13-10-48-09.gh-issue-93516.yJSait.rst diff --git a/Include/cpython/code.h b/Include/cpython/code.h index f544ea87406e25..f2f774de28329e 100644 --- a/Include/cpython/code.h +++ b/Include/cpython/code.h @@ -62,7 +62,8 @@ typedef uint16_t _Py_CODEUNIT; PyObject *co_exceptiontable; /* Byte string encoding exception handling \ table */ \ int co_flags; /* CO_..., see below */ \ - int co_warmup; /* Warmup counter for quickening */ \ + short co_warmup; /* Warmup counter for quickening */ \ + short _co_linearray_entry_size; /* Size of each entry in _co_linearray */ \ \ /* The rest are not so impactful on performance. */ \ int co_argcount; /* #arguments, except *args */ \ @@ -89,6 +90,7 @@ typedef uint16_t _Py_CODEUNIT; PyObject *co_linetable; /* bytes object that holds location info */ \ PyObject *co_weakreflist; /* to support weakrefs to code objects */ \ void *_co_code; /* cached co_code object/attribute */ \ + char *_co_linearray; /* array of line offsets */ \ /* Scratch space for extra data relating to the code object. \ Type is a void* to keep the format private in codeobject.c to force \ people to go through the proper APIs. */ \ diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index c181543722f6e0..706f6838f03e13 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -463,6 +463,34 @@ adaptive_counter_backoff(uint16_t counter) { } +/* Line array cache for tracing */ + +extern int _PyCode_CreateLineArray(PyCodeObject *co); + +static inline int +_PyCode_InitLineArray(PyCodeObject *co) +{ + if (co->_co_linearray) { + return 0; + } + return _PyCode_CreateLineArray(co); +} + +static inline int +_PyCode_LineNumberFromArray(PyCodeObject *co, int index) +{ + assert(co->_co_linearray != NULL); + assert(index >= 0); + if (co->_co_linearray_entry_size == 2) { + return ((int16_t *)co->_co_linearray)[index]; + } + else { + assert(co->_co_linearray_entry_size == 4); + return ((int32_t *)co->_co_linearray)[index]; + } +} + + #ifdef __cplusplus } #endif diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-06-13-10-48-09.gh-issue-93516.yJSait.rst b/Misc/NEWS.d/next/Core and Builtins/2022-06-13-10-48-09.gh-issue-93516.yJSait.rst new file mode 100644 index 00000000000000..5c22c7a67b6e51 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-06-13-10-48-09.gh-issue-93516.yJSait.rst @@ -0,0 +1,2 @@ +Lazily create a table mapping bytecode offsets to line numbers to speed up +calculation of line numbers when tracing. diff --git a/Objects/codeobject.c b/Objects/codeobject.c index dc6dec1b2a549b..2ce75e473dfb72 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -337,6 +337,8 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con) co->_co_code = NULL; co->co_warmup = QUICKENING_INITIAL_WARMUP_VALUE; + co->_co_linearray_entry_size = 0; + co->_co_linearray = NULL; memcpy(_PyCode_CODE(co), PyBytes_AS_STRING(con->code), PyBytes_GET_SIZE(con->code)); } @@ -695,12 +697,65 @@ PyCode_NewEmpty(const char *filename, const char *funcname, int firstlineno) lnotab_notes.txt for the details of the lnotab representation. */ + +int +_PyCode_CreateLineArray(PyCodeObject *co) +{ + assert(co->_co_linearray == NULL); + PyCodeAddressRange bounds; + int size; + int max_line = 0; + _PyCode_InitAddressRange(co, &bounds); + while (1) { + if (!_PyLineTable_NextAddressRange(&bounds)) { + break; + } + if (bounds.ar_line > max_line) { + max_line = bounds.ar_line; + } + } + if (max_line < (1 << 15)) { + size = 2; + } + else { + size = 4; + } + co->_co_linearray = PyMem_Malloc(Py_SIZE(co)*size); + if (co->_co_linearray == NULL) { + PyErr_NoMemory(); + return -1; + } + co->_co_linearray_entry_size = size; + _PyCode_InitAddressRange(co, &bounds); + while (1) { + if (!_PyLineTable_NextAddressRange(&bounds)) { + break; + } + int index = bounds.ar_start; + while (index < bounds.ar_end && index < Py_SIZE(co)) { + if (size == 2) { + assert(((int16_t)bounds.ar_line) == bounds.ar_line); + ((int16_t *)co->_co_linearray)[index] = bounds.ar_line; + } + else { + assert(size == 4); + ((int32_t *)co->_co_linearray)[index] = bounds.ar_line; + } + index++; + } + } + return 0; +} + int PyCode_Addr2Line(PyCodeObject *co, int addrq) { if (addrq < 0) { return co->co_firstlineno; } + if (co->_co_linearray) { + return _PyCode_LineNumberFromArray(co, addrq / sizeof(_Py_CODEUNIT)); + } assert(addrq >= 0 && addrq < _PyCode_NBYTES(co)); PyCodeAddressRange bounds; _PyCode_InitAddressRange(co, &bounds); diff --git a/Python/ceval.c b/Python/ceval.c index 0e8186347cd895..15eb9e102db40f 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -6815,9 +6815,10 @@ call_trace(Py_tracefunc func, PyObject *obj, tstate->tracing_what = what; PyThreadState_EnterTracing(tstate); assert(_PyInterpreterFrame_LASTI(frame) >= 0); - initialize_trace_info(&tstate->trace_info, frame); - int addr = _PyInterpreterFrame_LASTI(frame) * sizeof(_Py_CODEUNIT); - f->f_lineno = _PyCode_CheckLineNumber(addr, &tstate->trace_info.bounds); + if (_PyCode_InitLineArray(frame->f_code)) { + return -1; + } + f->f_lineno = _PyCode_LineNumberFromArray(frame->f_code, _PyInterpreterFrame_LASTI(frame)); result = func(obj, f, what, arg); f->f_lineno = 0; PyThreadState_LeaveTracing(tstate); @@ -6854,7 +6855,9 @@ maybe_call_line_trace(Py_tracefunc func, PyObject *obj, represents a jump backwards, update the frame's line number and then call the trace function if we're tracing source lines. */ - initialize_trace_info(&tstate->trace_info, frame); + if (_PyCode_InitLineArray(frame->f_code)) { + return -1; + } int entry_point = 0; _Py_CODEUNIT *code = _PyCode_CODE(frame->f_code); while (_PyOpcode_Deopt[_Py_OPCODE(code[entry_point])] != RESUME) { @@ -6865,10 +6868,9 @@ maybe_call_line_trace(Py_tracefunc func, PyObject *obj, lastline = -1; } else { - lastline = _PyCode_CheckLineNumber(instr_prev*sizeof(_Py_CODEUNIT), &tstate->trace_info.bounds); + lastline = _PyCode_LineNumberFromArray(frame->f_code, instr_prev); } - int addr = _PyInterpreterFrame_LASTI(frame) * sizeof(_Py_CODEUNIT); - int line = _PyCode_CheckLineNumber(addr, &tstate->trace_info.bounds); + int line = _PyCode_LineNumberFromArray(frame->f_code, _PyInterpreterFrame_LASTI(frame)); PyFrameObject *f = _PyFrame_GetFrameObject(frame); if (f == NULL) { return -1; diff --git a/Tools/scripts/deepfreeze.py b/Tools/scripts/deepfreeze.py index ac2076708a156f..45672a4d8db22c 100644 --- a/Tools/scripts/deepfreeze.py +++ b/Tools/scripts/deepfreeze.py @@ -250,6 +250,7 @@ def generate_code(self, name: str, code: types.CodeType) -> str: self.write(f".co_exceptiontable = {co_exceptiontable},") self.field(code, "co_flags") self.write(".co_warmup = QUICKENING_INITIAL_WARMUP_VALUE,") + self.write("._co_linearray_entry_size = 0,") self.field(code, "co_argcount") self.field(code, "co_posonlyargcount") self.field(code, "co_kwonlyargcount") @@ -266,6 +267,7 @@ def generate_code(self, name: str, code: types.CodeType) -> str: self.write(f".co_name = {co_name},") self.write(f".co_qualname = {co_qualname},") self.write(f".co_linetable = {co_linetable},") + self.write("._co_linearray = NULL,") self.write(f".co_code_adaptive = {co_code_adaptive},") name_as_code = f"(PyCodeObject *)&{name}" self.deallocs.append(f"_PyStaticCode_Dealloc({name_as_code});") From 3e5d1fde8162c27d7be31f8a9ab317298b8c89d5 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 17 Jun 2022 11:10:38 +0100 Subject: [PATCH 2/2] Refactor loops --- Include/internal/pycore_code.h | 1 + Objects/codeobject.c | 20 ++++++-------------- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 706f6838f03e13..8615ff1d515f9a 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -481,6 +481,7 @@ _PyCode_LineNumberFromArray(PyCodeObject *co, int index) { assert(co->_co_linearray != NULL); assert(index >= 0); + assert(index < Py_SIZE(co)); if (co->_co_linearray_entry_size == 2) { return ((int16_t *)co->_co_linearray)[index]; } diff --git a/Objects/codeobject.c b/Objects/codeobject.c index c71d7d55f1fbe2..bc3ef8acee152c 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -703,7 +703,6 @@ PyCode_NewEmpty(const char *filename, const char *funcname, int firstlineno) lnotab_notes.txt for the details of the lnotab representation. */ - int _PyCode_CreateLineArray(PyCodeObject *co) { @@ -712,10 +711,7 @@ _PyCode_CreateLineArray(PyCodeObject *co) int size; int max_line = 0; _PyCode_InitAddressRange(co, &bounds); - while (1) { - if (!_PyLineTable_NextAddressRange(&bounds)) { - break; - } + while(_PyLineTable_NextAddressRange(&bounds)) { if (bounds.ar_line > max_line) { max_line = bounds.ar_line; } @@ -733,14 +729,11 @@ _PyCode_CreateLineArray(PyCodeObject *co) } co->_co_linearray_entry_size = size; _PyCode_InitAddressRange(co, &bounds); - while (1) { - if (!_PyLineTable_NextAddressRange(&bounds)) { - break; - } - int addr = bounds.ar_start; - while (addr < bounds.ar_end) { - assert(addr < (int)(Py_SIZE(co) * sizeof(_Py_CODEUNIT))); - int index = addr / sizeof(_Py_CODEUNIT); + while(_PyLineTable_NextAddressRange(&bounds)) { + int start = bounds.ar_start / sizeof(_Py_CODEUNIT); + int end = bounds.ar_end / sizeof(_Py_CODEUNIT); + for (int index = start; index < end; index++) { + assert(index < (int)Py_SIZE(co)); if (size == 2) { assert(((int16_t)bounds.ar_line) == bounds.ar_line); ((int16_t *)co->_co_linearray)[index] = bounds.ar_line; @@ -749,7 +742,6 @@ _PyCode_CreateLineArray(PyCodeObject *co) assert(size == 4); ((int32_t *)co->_co_linearray)[index] = bounds.ar_line; } - addr += sizeof(_Py_CODEUNIT); } } return 0;