Skip to content

IGNORE: Project uops forever #107945

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 26 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
56133bb
Split `CALL_PY_EXACT_ARGS` into uops
gvanrossum Aug 5, 2023
907ff95
Fix merge so it works again (I think)
gvanrossum Aug 9, 2023
2c6be6d
Split into finer-grained uops
gvanrossum Aug 9, 2023
6d78ff2
Fix type error in stacking.py
gvanrossum Aug 10, 2023
0d8e66c
Add test
gvanrossum Aug 10, 2023
b75f30e
Add comment explaining _PUSH_FRAME's unused output effect
gvanrossum Aug 10, 2023
61c2822
Make PUSH_FRAME special case a little less myterious
gvanrossum Aug 10, 2023
f73ea90
Rename Instruction.write to write_case_body
gvanrossum Aug 10, 2023
12910fc
Move next_instr update to a more logical place
gvanrossum Aug 10, 2023
2fafa2c
Don't recompute macro cache offset
gvanrossum Aug 10, 2023
2717b07
Fold and refactor long line in stacking.py
gvanrossum Aug 10, 2023
e487908
Fold long lines in generate_cases.py
gvanrossum Aug 10, 2023
1d549af
Don't emit static assert to executor cases
gvanrossum Aug 10, 2023
f40fb1f
Factor away write_case_body (formerly Instruction.write)
gvanrossum Aug 10, 2023
4f6f8f8
Fold long lines
gvanrossum Aug 11, 2023
6facc8d
Make less of a special case of _PUSH_FRAME
gvanrossum Aug 11, 2023
94630d4
Stop special-casing _PUSH_FRAME altogether
gvanrossum Aug 11, 2023
675f317
Add function-by-version cache
gvanrossum Aug 8, 2023
bb514e6
Double max trace length to 64
gvanrossum Aug 8, 2023
61958fc
Trace into function calls
gvanrossum Aug 8, 2023
61e636b
Make RESUME a viable uop
gvanrossum Aug 12, 2023
d0c6f86
Clear func_version_cache in interpreter_clear()
gvanrossum Aug 12, 2023
bfa5566
Add a small essay on function and code versions
gvanrossum Aug 12, 2023
597b12e
Move function cache clearing earlier in finalization
gvanrossum Aug 12, 2023
d7b7e01
Cache borrowed references, cleared in func_dealloc
gvanrossum Aug 13, 2023
da4c7ad
DO NOT MERGE: Always use -Xuops
gvanrossum Aug 9, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions Include/internal/pycore_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,22 @@ extern PyObject* _PyFunction_Vectorcall(

#define FUNC_MAX_WATCHERS 8

#define FUNC_VERSION_CACHE_SIZE (1<<12) /* Must be a power of 2 */
struct _py_func_state {
uint32_t next_version;
// Borrowed references to function objects whose
// func_version % FUNC_VERSION_CACHE_SIZE
// once was equal to the index in the table.
// They are cleared when the function is deallocated.
PyFunctionObject *func_version_cache[FUNC_VERSION_CACHE_SIZE];
};

extern PyFunctionObject* _PyFunction_FromConstructor(PyFrameConstructor *constr);

extern uint32_t _PyFunction_GetVersionForCurrentState(PyFunctionObject *func);
extern void _PyFunction_SetVersion(PyFunctionObject *func, uint32_t version);
PyFunctionObject *_PyFunction_LookupByVersion(uint32_t version);

extern PyObject *_Py_set_function_type_params(
PyThreadState* unused, PyObject *func, PyObject *type_params);

Expand Down
20 changes: 17 additions & 3 deletions Include/internal/pycore_opcode_metadata.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Include/internal/pycore_uops.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ extern "C" {
# error "this header requires Py_BUILD_CORE define"
#endif

#define _Py_UOP_MAX_TRACE_LENGTH 32
#define _Py_UOP_MAX_TRACE_LENGTH 64

typedef struct {
uint32_t opcode;
Expand Down
18 changes: 18 additions & 0 deletions Lib/test/test_capi/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2618,6 +2618,24 @@ def testfunc(it):
with self.assertRaises(StopIteration):
next(it)

def test_call_py_exact_args(self):
def testfunc(n):
def dummy(x):
return x+1
for i in range(n):
dummy(i)

opt = _testinternalcapi.get_uop_optimizer()
with temporary_optimizer(opt):
testfunc(10)

ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
uops = {opname for opname, _, _ in ex}
self.assertIn("_PUSH_FRAME", uops)
self.assertIn("_BINARY_OP_ADD_INT", uops)



if __name__ == "__main__":
unittest.main()
79 changes: 77 additions & 2 deletions Objects/funcobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,73 @@ PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname
return NULL;
}

uint32_t _PyFunction_GetVersionForCurrentState(PyFunctionObject *func)
/*
Function versions
-----------------

Function versions are used to detect when a function object has been
updated, invalidating inline cache data used by the `CALL` bytecode
(notably `CALL_PY_EXACT_ARGS` and a few other `CALL` specializations).

They are also used by the Tier 2 superblock creation code to find
the function being called (and from there the code object).

How does a function's `func_version` field get initialized?

- `PyFunction_New` and friends initialize it to 0.
- The `MAKE_FUNCTION` instruction sets it from the code's `co_version`.
- It is reset to 0 when various attributes like `__code__` are set.
- A new version is allocated by `_PyFunction_GetVersionForCurrentState`
when the specializer needs a version and the version is 0.

The latter allocates versions using a counter in the interpreter state;
when the counter wraps around to 0, no more versions are allocated.
There is one other special case: functions with a non-standard
`vectorcall` field are not given a version.

When the function version is 0, the `CALL` bytecode is not specialized.

Code object versions
--------------------

So where to code objects get their `co_version`? There is a single
static global counter, `_Py_next_func_version`. This is initialized in
the generated (!) file `Python/deepfreeze/deepfreeze.c`, to 1 plus the
number of deep-frozen function objects in that file.
(In `_bootstrap_python.c` and `freeze_module.c` it is initialized to 1.)

Code objects get a new `co_version` allocated from this counter upon
creation. Since code objects are nominally immutable, `co_version` can
not be invalidated. The only way it can be 0 is when 2**32 or more
code objects have been created during the process's lifetime.
(The counter isn't reset by `fork()`, extending the lifetime.)
*/

void
_PyFunction_SetVersion(PyFunctionObject *func, uint32_t version)
{
func->func_version = version;
if (version != 0) {
PyInterpreterState *interp = _PyInterpreterState_GET();
interp->func_state.func_version_cache[
version % FUNC_VERSION_CACHE_SIZE] = func;
}
}

PyFunctionObject *
_PyFunction_LookupByVersion(uint32_t version)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
PyFunctionObject *func = interp->func_state.func_version_cache[
version % FUNC_VERSION_CACHE_SIZE];
if (func != NULL && func->func_version == version) {
return (PyFunctionObject *)Py_NewRef(func);
}
return NULL;
}

uint32_t
_PyFunction_GetVersionForCurrentState(PyFunctionObject *func)
{
if (func->func_version != 0) {
return func->func_version;
Expand All @@ -236,7 +302,7 @@ uint32_t _PyFunction_GetVersionForCurrentState(PyFunctionObject *func)
return 0;
}
uint32_t v = interp->func_state.next_version++;
func->func_version = v;
_PyFunction_SetVersion(func, v);
return v;
}

Expand Down Expand Up @@ -851,6 +917,15 @@ func_dealloc(PyFunctionObject *op)
if (op->func_weakreflist != NULL) {
PyObject_ClearWeakRefs((PyObject *) op);
}
if (op->func_version != 0) {
PyInterpreterState *interp = _PyInterpreterState_GET();
PyFunctionObject **slot =
interp->func_state.func_version_cache
+ (op->func_version % FUNC_VERSION_CACHE_SIZE);
if (*slot == op) {
*slot = NULL;
}
}
(void)func_clear(op);
// These aren't cleared by func_clear().
Py_DECREF(op->func_code);
Expand Down
78 changes: 60 additions & 18 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ dummy_func(
}

inst(RESUME, (--)) {
#if TIER_ONE
assert(tstate->cframe == &cframe);
assert(frame == cframe.current_frame);
/* Possibly combine this with eval breaker */
Expand All @@ -143,7 +144,9 @@ dummy_func(
ERROR_IF(err, error);
next_instr--;
}
else if (oparg < 2) {
else
#endif
if (oparg < 2) {
CHECK_EVAL_BREAKER();
}
}
Expand Down Expand Up @@ -957,13 +960,13 @@ dummy_func(
{
PyGenObject *gen = (PyGenObject *)receiver;
_PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
frame->return_offset = oparg;
STACK_SHRINK(1);
_PyFrame_StackPush(gen_frame, v);
gen->gi_frame_state = FRAME_EXECUTING;
gen->gi_exc_state.previous_item = tstate->exc_info;
tstate->exc_info = &gen->gi_exc_state;
SKIP_OVER(INLINE_CACHE_ENTRIES_SEND);
frame->return_offset = oparg;
DISPATCH_INLINED(gen_frame);
}
if (Py_IsNone(v) && PyIter_Check(receiver)) {
Expand Down Expand Up @@ -996,13 +999,13 @@ dummy_func(
DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, SEND);
STAT_INC(SEND, hit);
_PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
frame->return_offset = oparg;
STACK_SHRINK(1);
_PyFrame_StackPush(gen_frame, v);
gen->gi_frame_state = FRAME_EXECUTING;
gen->gi_exc_state.previous_item = tstate->exc_info;
tstate->exc_info = &gen->gi_exc_state;
SKIP_OVER(INLINE_CACHE_ENTRIES_SEND);
frame->return_offset = oparg;
DISPATCH_INLINED(gen_frame);
}

Expand Down Expand Up @@ -2586,14 +2589,14 @@ dummy_func(
DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, FOR_ITER);
STAT_INC(FOR_ITER, hit);
_PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
frame->return_offset = oparg;
_PyFrame_StackPush(gen_frame, Py_None);
gen->gi_frame_state = FRAME_EXECUTING;
gen->gi_exc_state.previous_item = tstate->exc_info;
tstate->exc_info = &gen->gi_exc_state;
SKIP_OVER(INLINE_CACHE_ENTRIES_FOR_ITER);
assert(next_instr[oparg].op.code == END_FOR ||
next_instr[oparg].op.code == INSTRUMENTED_END_FOR);
frame->return_offset = oparg;
DISPATCH_INLINED(gen_frame);
}

Expand Down Expand Up @@ -2944,32 +2947,70 @@ dummy_func(
GO_TO_INSTRUCTION(CALL_PY_EXACT_ARGS);
}

inst(CALL_PY_EXACT_ARGS, (unused/1, func_version/2, callable, self_or_null, args[oparg] -- unused)) {
ASSERT_KWNAMES_IS_NULL();
op(_CHECK_PEP_523, (--)) {
DEOPT_IF(tstate->interp->eval_frame, CALL);
int argcount = oparg;
if (self_or_null != NULL) {
args--;
argcount++;
}
}

op(_CHECK_FUNCTION_EXACT_ARGS, (func_version/2, callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) {
ASSERT_KWNAMES_IS_NULL();
DEOPT_IF(!PyFunction_Check(callable), CALL);
PyFunctionObject *func = (PyFunctionObject *)callable;
DEOPT_IF(func->func_version != func_version, CALL);
PyCodeObject *code = (PyCodeObject *)func->func_code;
DEOPT_IF(code->co_argcount != argcount, CALL);
DEOPT_IF(code->co_argcount != oparg + (self_or_null != NULL), CALL);
}

op(_CHECK_STACK_SPACE, (callable, unused, unused[oparg] -- callable, unused, unused[oparg])) {
PyFunctionObject *func = (PyFunctionObject *)callable;
PyCodeObject *code = (PyCodeObject *)func->func_code;
DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), CALL);
}

op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null, args[oparg] -- new_frame: _PyInterpreterFrame*)) {
int argcount = oparg;
if (self_or_null != NULL) {
args--;
argcount++;
}
STAT_INC(CALL, hit);
_PyInterpreterFrame *new_frame = _PyFrame_PushUnchecked(tstate, func, argcount);
PyFunctionObject *func = (PyFunctionObject *)callable;
new_frame = _PyFrame_PushUnchecked(tstate, func, argcount);
for (int i = 0; i < argcount; i++) {
new_frame->localsplus[i] = args[i];
}
// Manipulate stack directly since we leave using DISPATCH_INLINED().
STACK_SHRINK(oparg + 2);
SKIP_OVER(INLINE_CACHE_ENTRIES_CALL);
}

// The 'unused' output effect represents the return value
// (which will be pushed when the frame returns).
// It is needed so CALL_PY_EXACT_ARGS matches its family.
op(_PUSH_FRAME, (new_frame: _PyInterpreterFrame* -- unused)) {
// Write it out explicitly because it's subtly different.
// Eventually this should be the only occurrence of this code.
frame->return_offset = 0;
DISPATCH_INLINED(new_frame);
assert(tstate->interp->eval_frame == NULL);
SAVE_FRAME_STATE(); // Signals to the code generator
new_frame->previous = frame;
CALL_STAT_INC(inlined_py_calls);
#if TIER_ONE
frame = cframe.current_frame = new_frame;
goto start_frame;
#endif
#if TIER_TWO
frame = tstate->cframe->current_frame = new_frame;
stack_pointer = _PyFrame_GetStackPointer(frame);
ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive;
#endif
}

macro(CALL_PY_EXACT_ARGS) =
unused/1 + // Skip over the counter
_CHECK_PEP_523 +
_CHECK_FUNCTION_EXACT_ARGS +
_CHECK_STACK_SPACE +
_INIT_CALL_PY_EXACT_ARGS +
SAVE_IP + // Tier 2 only; special-cased oparg
_PUSH_FRAME;

inst(CALL_PY_WITH_DEFAULTS, (unused/1, func_version/2, callable, self_or_null, args[oparg] -- unused)) {
ASSERT_KWNAMES_IS_NULL();
DEOPT_IF(tstate->interp->eval_frame, CALL);
Expand Down Expand Up @@ -3504,7 +3545,8 @@ dummy_func(
goto error;
}

func_obj->func_version = ((PyCodeObject *)codeobj)->co_version;
_PyFunction_SetVersion(
func_obj, ((PyCodeObject *)codeobj)->co_version);
func = (PyObject *)func_obj;
}

Expand Down
1 change: 1 addition & 0 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -770,6 +770,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
#endif
{

#define TIER_ONE 1
#include "generated_cases.c.h"

/* INSTRUMENTED_LINE has to be here, rather than in bytecodes.c,
Expand Down
Loading