Skip to content

bpo-46841: Inline cache for BINARY_SUBSCR. #31618

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Mar 1, 2022
8 changes: 8 additions & 0 deletions Include/cpython/object.h
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,13 @@ struct _typeobject {
vectorcallfunc tp_vectorcall;
};

/* This struct is used by the specializer
* It should should be treated as an opaque blob
* by code other than the specializer and interpreter. */
struct _specialization_cache {
PyObject *getitem;
};

/* The *real* layout of a type object when allocated on the heap */
typedef struct _heaptypeobject {
/* Note: there's a dependency on the order of these members
Expand All @@ -247,6 +254,7 @@ typedef struct _heaptypeobject {
struct _dictkeysobject *ht_cached_keys;
PyObject *ht_module;
char *_ht_tpname; // Storage for "tp_name"; see PyType_FromModuleAndSpec
struct _specialization_cache _spec_cache; // For use by the specializer.
/* here are optional user slots, followed by the members. */
} PyHeapTypeObject;

Expand Down
11 changes: 10 additions & 1 deletion Include/internal/pycore_code.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,15 @@ typedef struct {

#define INLINE_CACHE_ENTRIES_COMPARE_OP CACHE_ENTRIES(_PyCompareOpCache)

typedef struct {
_Py_CODEUNIT counter;
_Py_CODEUNIT type_version;
_Py_CODEUNIT _t1;
_Py_CODEUNIT func_version;
} _PyBinarySubscrCache;

#define INLINE_CACHE_ENTRIES_BINARY_SUBSCR CACHE_ENTRIES(_PyBinarySubscrCache)

/* Maximum size of code to quicken, in code units. */
#define MAX_SIZE_TO_QUICKEN 5000

Expand Down Expand Up @@ -323,7 +332,7 @@ extern int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObjec
extern int _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
extern int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name);
extern int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
extern int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache);
extern int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr);
extern int _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub, _Py_CODEUNIT *instr);
extern int _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr, int nargs,
PyObject *kwnames, SpecializedCacheEntry *cache);
Expand Down
1 change: 1 addition & 0 deletions Include/opcode.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Lib/importlib/_bootstrap_external.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,7 @@ def _write_atomic(path, data, mode=0o666):
# Python 3.11a5 3480 (New CALL opcodes, second iteration)
# Python 3.11a5 3481 (Use inline cache for BINARY_OP)
# Python 3.11a5 3482 (Use inline caching for UNPACK_SEQUENCE and LOAD_GLOBAL)
# Python 3.11a5 3483 (Use inline caching for COMPARE_OP)
# Python 3.11a5 3483 (Use inline caching for COMPARE_OP and BINARY_SUBSCR)

# Python 3.12 will start with magic number 3500

Expand Down
2 changes: 1 addition & 1 deletion Lib/opcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def jabs_op(name, op, entries=0):

def_op('UNARY_INVERT', 15)

def_op('BINARY_SUBSCR', 25)
def_op('BINARY_SUBSCR', 25, 4)

def_op('GET_LEN', 30)
def_op('MATCH_MAPPING', 31)
Expand Down
2 changes: 1 addition & 1 deletion Lib/test/test_capi.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ class C(): pass
*_, count = line.split(b' ')
count = int(count)
self.assertLessEqual(count, i*5)
self.assertGreaterEqual(count, i*5-1)
self.assertGreaterEqual(count, i*5-2)

def test_mapping_keys_values_items(self):
class Mapping1(dict):
Expand Down
4 changes: 3 additions & 1 deletion Lib/test/test_sys.py
Original file line number Diff line number Diff line change
Expand Up @@ -1501,7 +1501,9 @@ def delx(self): del self.__x
'3P' # PyMappingMethods
'10P' # PySequenceMethods
'2P' # PyBufferProcs
'6P')
'6P'
'1P' # Specializer cache
)
class newstyleclass(object): pass
# Separate block for PyDictKeysObject with 8 keys and 5 entries
check(newstyleclass, s + calcsize(DICT_KEY_STRUCT_FORMAT) + 64 + 42*calcsize("n2P"))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Use inline cache for :opcode:`BINARY_SUBSCR`.
22 changes: 12 additions & 10 deletions Programs/test_frozenmain.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

32 changes: 19 additions & 13 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -2102,25 +2102,24 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
SET_TOP(res);
if (res == NULL)
goto error;
JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
DISPATCH();
}

TARGET(BINARY_SUBSCR_ADAPTIVE) {
SpecializedCacheEntry *cache = GET_CACHE();
if (cache->adaptive.counter == 0) {
_PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr;
if (cache->counter == 0) {
PyObject *sub = TOP();
PyObject *container = SECOND();
next_instr--;
if (_Py_Specialize_BinarySubscr(container, sub, next_instr, cache) < 0) {
if (_Py_Specialize_BinarySubscr(container, sub, next_instr) < 0) {
goto error;
}
DISPATCH();
}
else {
STAT_INC(BINARY_SUBSCR, deferred);
cache->adaptive.counter--;
assert(cache->adaptive.original_oparg == 0);
/* No need to set oparg here; it isn't used by BINARY_SUBSCR */
cache->counter--;
JUMP_TO_INSTRUCTION(BINARY_SUBSCR);
}
}
Expand All @@ -2146,6 +2145,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
Py_DECREF(sub);
SET_TOP(res);
Py_DECREF(list);
JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
NOTRACE_DISPATCH();
}

Expand All @@ -2170,6 +2170,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
Py_DECREF(sub);
SET_TOP(res);
Py_DECREF(tuple);
JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
NOTRACE_DISPATCH();
}

Expand All @@ -2188,18 +2189,22 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
Py_DECREF(sub);
SET_TOP(res);
Py_DECREF(dict);
JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
DISPATCH();
}

TARGET(BINARY_SUBSCR_GETITEM) {
PyObject *sub = TOP();
PyObject *container = SECOND();
SpecializedCacheEntry *caches = GET_CACHE();
_PyAdaptiveEntry *cache0 = &caches[0].adaptive;
_PyObjectCache *cache1 = &caches[-1].obj;
PyFunctionObject *getitem = (PyFunctionObject *)cache1->obj;
DEOPT_IF(Py_TYPE(container)->tp_version_tag != cache0->version, BINARY_SUBSCR);
DEOPT_IF(getitem->func_version != cache0->index, BINARY_SUBSCR);
_PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr;
uint32_t type_version = read32(&cache->type_version);
PyTypeObject *tp = Py_TYPE(container);
DEOPT_IF(tp->tp_version_tag != type_version, BINARY_SUBSCR);
assert(tp->tp_flags & Py_TPFLAGS_HEAPTYPE);
PyObject *cached = ((PyHeapTypeObject *)tp)->_spec_cache.getitem;
assert(PyFunction_Check(cached));
PyFunctionObject *getitem = (PyFunctionObject *)cached;
DEOPT_IF(getitem->func_version != cache->func_version, BINARY_SUBSCR);
PyCodeObject *code = (PyCodeObject *)getitem->func_code;
size_t size = code->co_nlocalsplus + code->co_stacksize + FRAME_SPECIALS_SIZE;
assert(code->co_argcount == 2);
Expand All @@ -2218,6 +2223,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
new_frame->localsplus[i] = NULL;
}
_PyFrame_SetStackPointer(frame, stack_pointer);
frame->f_lasti += INLINE_CACHE_ENTRIES_BINARY_SUBSCR;
new_frame->previous = frame;
frame = cframe.current_frame = new_frame;
CALL_STAT_INC(inlined_py_calls);
Expand Down Expand Up @@ -5605,7 +5611,7 @@ MISS_WITH_CACHE(PRECALL)
MISS_WITH_CACHE(CALL)
MISS_WITH_INLINE_CACHE(BINARY_OP)
MISS_WITH_INLINE_CACHE(COMPARE_OP)
MISS_WITH_CACHE(BINARY_SUBSCR)
MISS_WITH_INLINE_CACHE(BINARY_SUBSCR)
MISS_WITH_INLINE_CACHE(UNPACK_SEQUENCE)
MISS_WITH_OPARG_COUNTER(STORE_SUBSCR)

Expand Down
30 changes: 19 additions & 11 deletions Python/specialize.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ static uint8_t adaptive_opcodes[256] = {
static uint8_t cache_requirements[256] = {
[LOAD_ATTR] = 1, // _PyAdaptiveEntry
[LOAD_METHOD] = 3, /* _PyAdaptiveEntry, _PyAttrCache and _PyObjectCache */
[BINARY_SUBSCR] = 2, /* _PyAdaptiveEntry, _PyObjectCache */
[STORE_SUBSCR] = 0,
[CALL] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */
[PRECALL] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */
Expand Down Expand Up @@ -385,6 +384,8 @@ optimize(SpecializedCacheOrInstruction *quickened, int len)
if (adaptive_opcode) {
if (_PyOpcode_InlineCacheEntries[opcode]) {
instructions[i] = _Py_MAKECODEUNIT(adaptive_opcode, oparg);
previous_opcode = -1;
i += _PyOpcode_InlineCacheEntries[opcode];
}
else if (previous_opcode != EXTENDED_ARG) {
int new_oparg = oparg_from_instruction_and_update_offset(
Expand Down Expand Up @@ -553,6 +554,7 @@ initial_counter_value(void) {
#define SPEC_FAIL_SUBSCR_PY_SIMPLE 20
#define SPEC_FAIL_SUBSCR_PY_OTHER 21
#define SPEC_FAIL_SUBSCR_DICT_SUBCLASS_NO_OVERRIDE 22
#define SPEC_FAIL_SUBSCR_NOT_HEAP_TYPE 23

/* Binary op */

Expand Down Expand Up @@ -1335,9 +1337,11 @@ function_kind(PyCodeObject *code) {

int
_Py_Specialize_BinarySubscr(
PyObject *container, PyObject *sub, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache)
PyObject *container, PyObject *sub, _Py_CODEUNIT *instr)
{
_PyAdaptiveEntry *cache0 = &cache->adaptive;
assert(_PyOpcode_InlineCacheEntries[BINARY_SUBSCR] ==
INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
_PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)(instr + 1);
PyTypeObject *container_type = Py_TYPE(container);
if (container_type == &PyList_Type) {
if (PyLong_CheckExact(sub)) {
Expand All @@ -1364,26 +1368,30 @@ _Py_Specialize_BinarySubscr(
PyTypeObject *cls = Py_TYPE(container);
PyObject *descriptor = _PyType_Lookup(cls, &_Py_ID(__getitem__));
if (descriptor && Py_TYPE(descriptor) == &PyFunction_Type) {
if (!(container_type->tp_flags & Py_TPFLAGS_HEAPTYPE)) {
SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_SUBSCR_NOT_HEAP_TYPE);
goto fail;
}
PyFunctionObject *func = (PyFunctionObject *)descriptor;
PyCodeObject *code = (PyCodeObject *)func->func_code;
int kind = function_kind(code);
PyCodeObject *fcode = (PyCodeObject *)func->func_code;
int kind = function_kind(fcode);
if (kind != SIMPLE_FUNCTION) {
SPECIALIZATION_FAIL(BINARY_SUBSCR, kind);
goto fail;
}
if (code->co_argcount != 2) {
if (fcode->co_argcount != 2) {
SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS);
goto fail;
}
assert(cls->tp_version_tag != 0);
cache0->version = cls->tp_version_tag;
write32(&cache->type_version, cls->tp_version_tag);
int version = _PyFunction_GetVersionForCurrentState(func);
if (version == 0 || version != (uint16_t)version) {
SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_VERSIONS);
goto fail;
}
cache0->index = version;
cache[-1].obj.obj = descriptor;
cache->func_version = version;
((PyHeapTypeObject *)container_type)->_spec_cache.getitem = descriptor;
*instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_GETITEM, _Py_OPARG(*instr));
goto success;
}
Expand All @@ -1392,12 +1400,12 @@ _Py_Specialize_BinarySubscr(
fail:
STAT_INC(BINARY_SUBSCR, failure);
assert(!PyErr_Occurred());
cache_backoff(cache0);
cache->counter = ADAPTIVE_CACHE_BACKOFF;
return 0;
success:
STAT_INC(BINARY_SUBSCR, success);
assert(!PyErr_Occurred());
cache0->counter = initial_counter_value();
cache->counter = initial_counter_value();
return 0;
}

Expand Down