Skip to content

Commit 3b0f1c5

Browse files
authored
bpo-46841: Use inline cache for BINARY_SUBSCR. (GH-31618)
1 parent e91b0a7 commit 3b0f1c5

File tree

11 files changed

+76
-39
lines changed

11 files changed

+76
-39
lines changed

Include/cpython/object.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,13 @@ struct _typeobject {
229229
vectorcallfunc tp_vectorcall;
230230
};
231231

232+
/* This struct is used by the specializer
233+
* It should should be treated as an opaque blob
234+
* by code other than the specializer and interpreter. */
235+
struct _specialization_cache {
236+
PyObject *getitem;
237+
};
238+
232239
/* The *real* layout of a type object when allocated on the heap */
233240
typedef struct _heaptypeobject {
234241
/* Note: there's a dependency on the order of these members
@@ -247,6 +254,7 @@ typedef struct _heaptypeobject {
247254
struct _dictkeysobject *ht_cached_keys;
248255
PyObject *ht_module;
249256
char *_ht_tpname; // Storage for "tp_name"; see PyType_FromModuleAndSpec
257+
struct _specialization_cache _spec_cache; // For use by the specializer.
250258
/* here are optional user slots, followed by the members. */
251259
} PyHeapTypeObject;
252260

Include/internal/pycore_code.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,15 @@ typedef struct {
9292

9393
#define INLINE_CACHE_ENTRIES_COMPARE_OP CACHE_ENTRIES(_PyCompareOpCache)
9494

95+
typedef struct {
96+
_Py_CODEUNIT counter;
97+
_Py_CODEUNIT type_version;
98+
_Py_CODEUNIT _t1;
99+
_Py_CODEUNIT func_version;
100+
} _PyBinarySubscrCache;
101+
102+
#define INLINE_CACHE_ENTRIES_BINARY_SUBSCR CACHE_ENTRIES(_PyBinarySubscrCache)
103+
95104
/* Maximum size of code to quicken, in code units. */
96105
#define MAX_SIZE_TO_QUICKEN 5000
97106

@@ -323,7 +332,7 @@ extern int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObjec
323332
extern int _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
324333
extern int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name);
325334
extern int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
326-
extern int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache);
335+
extern int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr);
327336
extern int _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub, _Py_CODEUNIT *instr);
328337
extern int _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr, int nargs,
329338
PyObject *kwnames, SpecializedCacheEntry *cache);

Include/opcode.h

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Lib/importlib/_bootstrap_external.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,7 @@ def _write_atomic(path, data, mode=0o666):
389389
# Python 3.11a5 3480 (New CALL opcodes, second iteration)
390390
# Python 3.11a5 3481 (Use inline cache for BINARY_OP)
391391
# Python 3.11a5 3482 (Use inline caching for UNPACK_SEQUENCE and LOAD_GLOBAL)
392-
# Python 3.11a5 3483 (Use inline caching for COMPARE_OP)
392+
# Python 3.11a5 3483 (Use inline caching for COMPARE_OP and BINARY_SUBSCR)
393393

394394
# Python 3.12 will start with magic number 3500
395395

Lib/opcode.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def jabs_op(name, op, entries=0):
6868

6969
def_op('UNARY_INVERT', 15)
7070

71-
def_op('BINARY_SUBSCR', 25)
71+
def_op('BINARY_SUBSCR', 25, 4)
7272

7373
def_op('GET_LEN', 30)
7474
def_op('MATCH_MAPPING', 31)

Lib/test/test_capi.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,7 @@ class C(): pass
335335
*_, count = line.split(b' ')
336336
count = int(count)
337337
self.assertLessEqual(count, i*5)
338-
self.assertGreaterEqual(count, i*5-1)
338+
self.assertGreaterEqual(count, i*5-2)
339339

340340
def test_mapping_keys_values_items(self):
341341
class Mapping1(dict):

Lib/test/test_sys.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1501,7 +1501,9 @@ def delx(self): del self.__x
15011501
'3P' # PyMappingMethods
15021502
'10P' # PySequenceMethods
15031503
'2P' # PyBufferProcs
1504-
'6P')
1504+
'6P'
1505+
'1P' # Specializer cache
1506+
)
15051507
class newstyleclass(object): pass
15061508
# Separate block for PyDictKeysObject with 8 keys and 5 entries
15071509
check(newstyleclass, s + calcsize(DICT_KEY_STRUCT_FORMAT) + 64 + 42*calcsize("n2P"))
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Use inline cache for :opcode:`BINARY_SUBSCR`.

Programs/test_frozenmain.h

Lines changed: 12 additions & 10 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/ceval.c

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2102,25 +2102,24 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
21022102
SET_TOP(res);
21032103
if (res == NULL)
21042104
goto error;
2105+
JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
21052106
DISPATCH();
21062107
}
21072108

21082109
TARGET(BINARY_SUBSCR_ADAPTIVE) {
2109-
SpecializedCacheEntry *cache = GET_CACHE();
2110-
if (cache->adaptive.counter == 0) {
2110+
_PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr;
2111+
if (cache->counter == 0) {
21112112
PyObject *sub = TOP();
21122113
PyObject *container = SECOND();
21132114
next_instr--;
2114-
if (_Py_Specialize_BinarySubscr(container, sub, next_instr, cache) < 0) {
2115+
if (_Py_Specialize_BinarySubscr(container, sub, next_instr) < 0) {
21152116
goto error;
21162117
}
21172118
DISPATCH();
21182119
}
21192120
else {
21202121
STAT_INC(BINARY_SUBSCR, deferred);
2121-
cache->adaptive.counter--;
2122-
assert(cache->adaptive.original_oparg == 0);
2123-
/* No need to set oparg here; it isn't used by BINARY_SUBSCR */
2122+
cache->counter--;
21242123
JUMP_TO_INSTRUCTION(BINARY_SUBSCR);
21252124
}
21262125
}
@@ -2146,6 +2145,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
21462145
Py_DECREF(sub);
21472146
SET_TOP(res);
21482147
Py_DECREF(list);
2148+
JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
21492149
NOTRACE_DISPATCH();
21502150
}
21512151

@@ -2170,6 +2170,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
21702170
Py_DECREF(sub);
21712171
SET_TOP(res);
21722172
Py_DECREF(tuple);
2173+
JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
21732174
NOTRACE_DISPATCH();
21742175
}
21752176

@@ -2188,18 +2189,22 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
21882189
Py_DECREF(sub);
21892190
SET_TOP(res);
21902191
Py_DECREF(dict);
2192+
JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
21912193
DISPATCH();
21922194
}
21932195

21942196
TARGET(BINARY_SUBSCR_GETITEM) {
21952197
PyObject *sub = TOP();
21962198
PyObject *container = SECOND();
2197-
SpecializedCacheEntry *caches = GET_CACHE();
2198-
_PyAdaptiveEntry *cache0 = &caches[0].adaptive;
2199-
_PyObjectCache *cache1 = &caches[-1].obj;
2200-
PyFunctionObject *getitem = (PyFunctionObject *)cache1->obj;
2201-
DEOPT_IF(Py_TYPE(container)->tp_version_tag != cache0->version, BINARY_SUBSCR);
2202-
DEOPT_IF(getitem->func_version != cache0->index, BINARY_SUBSCR);
2199+
_PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr;
2200+
uint32_t type_version = read32(&cache->type_version);
2201+
PyTypeObject *tp = Py_TYPE(container);
2202+
DEOPT_IF(tp->tp_version_tag != type_version, BINARY_SUBSCR);
2203+
assert(tp->tp_flags & Py_TPFLAGS_HEAPTYPE);
2204+
PyObject *cached = ((PyHeapTypeObject *)tp)->_spec_cache.getitem;
2205+
assert(PyFunction_Check(cached));
2206+
PyFunctionObject *getitem = (PyFunctionObject *)cached;
2207+
DEOPT_IF(getitem->func_version != cache->func_version, BINARY_SUBSCR);
22032208
PyCodeObject *code = (PyCodeObject *)getitem->func_code;
22042209
size_t size = code->co_nlocalsplus + code->co_stacksize + FRAME_SPECIALS_SIZE;
22052210
assert(code->co_argcount == 2);
@@ -2218,6 +2223,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
22182223
new_frame->localsplus[i] = NULL;
22192224
}
22202225
_PyFrame_SetStackPointer(frame, stack_pointer);
2226+
frame->f_lasti += INLINE_CACHE_ENTRIES_BINARY_SUBSCR;
22212227
new_frame->previous = frame;
22222228
frame = cframe.current_frame = new_frame;
22232229
CALL_STAT_INC(inlined_py_calls);
@@ -5605,7 +5611,7 @@ MISS_WITH_CACHE(PRECALL)
56055611
MISS_WITH_CACHE(CALL)
56065612
MISS_WITH_INLINE_CACHE(BINARY_OP)
56075613
MISS_WITH_INLINE_CACHE(COMPARE_OP)
5608-
MISS_WITH_CACHE(BINARY_SUBSCR)
5614+
MISS_WITH_INLINE_CACHE(BINARY_SUBSCR)
56095615
MISS_WITH_INLINE_CACHE(UNPACK_SEQUENCE)
56105616
MISS_WITH_OPARG_COUNTER(STORE_SUBSCR)
56115617

Python/specialize.c

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@ static uint8_t adaptive_opcodes[256] = {
6060
static uint8_t cache_requirements[256] = {
6161
[LOAD_ATTR] = 1, // _PyAdaptiveEntry
6262
[LOAD_METHOD] = 3, /* _PyAdaptiveEntry, _PyAttrCache and _PyObjectCache */
63-
[BINARY_SUBSCR] = 2, /* _PyAdaptiveEntry, _PyObjectCache */
6463
[STORE_SUBSCR] = 0,
6564
[CALL] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */
6665
[PRECALL] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */
@@ -385,6 +384,8 @@ optimize(SpecializedCacheOrInstruction *quickened, int len)
385384
if (adaptive_opcode) {
386385
if (_PyOpcode_InlineCacheEntries[opcode]) {
387386
instructions[i] = _Py_MAKECODEUNIT(adaptive_opcode, oparg);
387+
previous_opcode = -1;
388+
i += _PyOpcode_InlineCacheEntries[opcode];
388389
}
389390
else if (previous_opcode != EXTENDED_ARG) {
390391
int new_oparg = oparg_from_instruction_and_update_offset(
@@ -553,6 +554,7 @@ initial_counter_value(void) {
553554
#define SPEC_FAIL_SUBSCR_PY_SIMPLE 20
554555
#define SPEC_FAIL_SUBSCR_PY_OTHER 21
555556
#define SPEC_FAIL_SUBSCR_DICT_SUBCLASS_NO_OVERRIDE 22
557+
#define SPEC_FAIL_SUBSCR_NOT_HEAP_TYPE 23
556558

557559
/* Binary op */
558560

@@ -1335,9 +1337,11 @@ function_kind(PyCodeObject *code) {
13351337

13361338
int
13371339
_Py_Specialize_BinarySubscr(
1338-
PyObject *container, PyObject *sub, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache)
1340+
PyObject *container, PyObject *sub, _Py_CODEUNIT *instr)
13391341
{
1340-
_PyAdaptiveEntry *cache0 = &cache->adaptive;
1342+
assert(_PyOpcode_InlineCacheEntries[BINARY_SUBSCR] ==
1343+
INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
1344+
_PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)(instr + 1);
13411345
PyTypeObject *container_type = Py_TYPE(container);
13421346
if (container_type == &PyList_Type) {
13431347
if (PyLong_CheckExact(sub)) {
@@ -1364,26 +1368,30 @@ _Py_Specialize_BinarySubscr(
13641368
PyTypeObject *cls = Py_TYPE(container);
13651369
PyObject *descriptor = _PyType_Lookup(cls, &_Py_ID(__getitem__));
13661370
if (descriptor && Py_TYPE(descriptor) == &PyFunction_Type) {
1371+
if (!(container_type->tp_flags & Py_TPFLAGS_HEAPTYPE)) {
1372+
SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_SUBSCR_NOT_HEAP_TYPE);
1373+
goto fail;
1374+
}
13671375
PyFunctionObject *func = (PyFunctionObject *)descriptor;
1368-
PyCodeObject *code = (PyCodeObject *)func->func_code;
1369-
int kind = function_kind(code);
1376+
PyCodeObject *fcode = (PyCodeObject *)func->func_code;
1377+
int kind = function_kind(fcode);
13701378
if (kind != SIMPLE_FUNCTION) {
13711379
SPECIALIZATION_FAIL(BINARY_SUBSCR, kind);
13721380
goto fail;
13731381
}
1374-
if (code->co_argcount != 2) {
1382+
if (fcode->co_argcount != 2) {
13751383
SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS);
13761384
goto fail;
13771385
}
13781386
assert(cls->tp_version_tag != 0);
1379-
cache0->version = cls->tp_version_tag;
1387+
write32(&cache->type_version, cls->tp_version_tag);
13801388
int version = _PyFunction_GetVersionForCurrentState(func);
13811389
if (version == 0 || version != (uint16_t)version) {
13821390
SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_VERSIONS);
13831391
goto fail;
13841392
}
1385-
cache0->index = version;
1386-
cache[-1].obj.obj = descriptor;
1393+
cache->func_version = version;
1394+
((PyHeapTypeObject *)container_type)->_spec_cache.getitem = descriptor;
13871395
*instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_GETITEM, _Py_OPARG(*instr));
13881396
goto success;
13891397
}
@@ -1392,12 +1400,12 @@ _Py_Specialize_BinarySubscr(
13921400
fail:
13931401
STAT_INC(BINARY_SUBSCR, failure);
13941402
assert(!PyErr_Occurred());
1395-
cache_backoff(cache0);
1403+
cache->counter = ADAPTIVE_CACHE_BACKOFF;
13961404
return 0;
13971405
success:
13981406
STAT_INC(BINARY_SUBSCR, success);
13991407
assert(!PyErr_Occurred());
1400-
cache0->counter = initial_counter_value();
1408+
cache->counter = initial_counter_value();
14011409
return 0;
14021410
}
14031411

0 commit comments

Comments
 (0)