Skip to content

Commit 25db2b3

Browse files
authored
bpo-46675: Allow object value arrays and split key dictionaries larger than 16 (GH-31191)
1 parent 328fe3f commit 25db2b3

File tree

6 files changed

+91
-51
lines changed

6 files changed

+91
-51
lines changed

Include/internal/pycore_dict.h

+22-4
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,17 @@ struct _dictkeysobject {
9999
see the DK_ENTRIES() macro */
100100
};
101101

102-
/* This must be no more than 16, for the order vector to fit in 64 bits */
103-
#define SHARED_KEYS_MAX_SIZE 16
104-
102+
/* This must be no more than 250, for the prefix size to fit in one byte. */
103+
#define SHARED_KEYS_MAX_SIZE 30
104+
#define NEXT_LOG2_SHARED_KEYS_MAX_SIZE 6
105+
106+
/* Layout of dict values:
107+
*
108+
* The PyObject *values are preceded by an array of bytes holding
109+
* the insertion order and size.
110+
* [-1] = prefix size. [-2] = used size. size[-2-n...] = insertion order.
111+
*/
105112
struct _dictvalues {
106-
uint64_t mv_order;
107113
PyObject *values[1];
108114
};
109115

@@ -131,6 +137,18 @@ extern uint64_t _pydict_global_version;
131137

132138
PyObject *_PyObject_MakeDictFromInstanceAttributes(PyObject *obj, PyDictValues *values);
133139

140+
static inline void
141+
_PyDictValues_AddToInsertionOrder(PyDictValues *values, Py_ssize_t ix)
142+
{
143+
assert(ix < SHARED_KEYS_MAX_SIZE);
144+
uint8_t *size_ptr = ((uint8_t *)values)-2;
145+
int size = *size_ptr;
146+
assert(size+2 < ((uint8_t *)values)[-1]);
147+
size++;
148+
size_ptr[-size] = (uint8_t)ix;
149+
*size_ptr = size;
150+
}
151+
134152
#ifdef __cplusplus
135153
}
136154
#endif

Lib/test/test_descr.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -5505,7 +5505,7 @@ class B(A):
55055505
pass
55065506

55075507
#Shrink keys by repeatedly creating instances
5508-
[(A(), B()) for _ in range(20)]
5508+
[(A(), B()) for _ in range(30)]
55095509

55105510
a, b = A(), B()
55115511
self.assertEqual(sys.getsizeof(vars(a)), sys.getsizeof(vars(b)))

Lib/test/test_sys.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -1504,15 +1504,16 @@ def delx(self): del self.__x
15041504
'6P')
15051505
class newstyleclass(object): pass
15061506
# Separate block for PyDictKeysObject with 8 keys and 5 entries
1507-
check(newstyleclass, s + calcsize(DICT_KEY_STRUCT_FORMAT) + 32 + 21*calcsize("n2P"))
1507+
check(newstyleclass, s + calcsize(DICT_KEY_STRUCT_FORMAT) + 64 + 42*calcsize("n2P"))
15081508
# dict with shared keys
1509-
check(newstyleclass().__dict__, size('nQ2P') + 15*self.P)
1509+
[newstyleclass() for _ in range(100)]
1510+
check(newstyleclass().__dict__, size('nQ2P') + self.P)
15101511
o = newstyleclass()
15111512
o.a = o.b = o.c = o.d = o.e = o.f = o.g = o.h = 1
15121513
# Separate block for PyDictKeysObject with 16 keys and 10 entries
1513-
check(newstyleclass, s + calcsize(DICT_KEY_STRUCT_FORMAT) + 32 + 21*calcsize("n2P"))
1514+
check(newstyleclass, s + calcsize(DICT_KEY_STRUCT_FORMAT) + 64 + 42*calcsize("n2P"))
15141515
# dict with shared keys
1515-
check(newstyleclass().__dict__, size('nQ2P') + 13*self.P)
1516+
check(newstyleclass().__dict__, size('nQ2P') + self.P)
15161517
# unicode
15171518
# each tuple contains a string and its expected character size
15181519
# don't put any static strings here, as they may contain
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Allow more than 16 items in a split dict before it is combined. The limit is
2+
now 254.

Objects/dictobject.c

+59-39
Original file line numberDiff line numberDiff line change
@@ -453,8 +453,14 @@ static PyDictKeysObject empty_keys_struct = {
453453
};
454454

455455

456-
static PyDictValues empty_values_struct = { 0, { NULL }};
457-
#define empty_values (&empty_values_struct)
456+
struct {
457+
uint8_t prefix[sizeof(PyObject *)];
458+
PyDictValues values;
459+
} empty_values_struct = {
460+
{ [sizeof(PyObject *)-1] = sizeof(PyObject *) },
461+
{{NULL}}
462+
};
463+
#define empty_values (&empty_values_struct.values)
458464

459465
#define Py_EMPTY_KEYS &empty_keys_struct
460466

@@ -470,9 +476,9 @@ static PyDictValues empty_values_struct = { 0, { NULL }};
470476
static inline int
471477
get_index_from_order(PyDictObject *mp, Py_ssize_t i)
472478
{
473-
assert(mp->ma_used <= 16);
474-
int shift = (int)(mp->ma_used-1-i)*4;
475-
return (int)(mp->ma_values->mv_order >> shift) & 15;
479+
assert(mp->ma_used <= SHARED_KEYS_MAX_SIZE);
480+
assert(i < (((char *)mp->ma_values)[-2]));
481+
return ((char *)mp->ma_values)[-3-i];
476482
}
477483

478484
int
@@ -636,11 +642,25 @@ free_keys_object(PyDictKeysObject *keys)
636642
static inline PyDictValues*
637643
new_values(Py_ssize_t size)
638644
{
639-
Py_ssize_t n = sizeof(PyDictValues) + sizeof(PyObject *) * (size-1);
640-
return (PyDictValues*)PyMem_Malloc(n);
645+
assert(size > 0);
646+
size_t prefix_size = _Py_SIZE_ROUND_UP(size+2, sizeof(PyObject *));
647+
assert(prefix_size < 256);
648+
size_t n = prefix_size + size * sizeof(PyObject *);
649+
uint8_t *mem = PyMem_Malloc(n);
650+
if (mem == NULL) {
651+
return NULL;
652+
}
653+
assert(prefix_size % sizeof(PyObject *) == 0);
654+
mem[prefix_size-1] = (uint8_t)prefix_size;
655+
return (PyDictValues*)(mem + prefix_size);
641656
}
642657

643-
#define free_values(values) PyMem_Free(values)
658+
static inline void
659+
free_values(PyDictValues *values)
660+
{
661+
int prefix_size = ((uint8_t *)values)[-1];
662+
PyMem_Free(((char *)values)-prefix_size);
663+
}
644664

645665
/* Consumes a reference to the keys object */
646666
static PyObject *
@@ -699,7 +719,7 @@ new_dict_with_shared_keys(PyDictKeysObject *keys)
699719
dictkeys_decref(keys);
700720
return PyErr_NoMemory();
701721
}
702-
values->mv_order = 0;
722+
((char *)values)[-2] = 0;
703723
for (i = 0; i < size; i++) {
704724
values->values[i] = NULL;
705725
}
@@ -1017,7 +1037,7 @@ insertion_resize(PyDictObject *mp)
10171037
return dictresize(mp, calculate_log2_keysize(GROWTH_RATE(mp)));
10181038
}
10191039

1020-
static int
1040+
static Py_ssize_t
10211041
insert_into_dictkeys(PyDictKeysObject *keys, PyObject *name)
10221042
{
10231043
assert(PyUnicode_CheckExact(name));
@@ -1048,7 +1068,7 @@ insert_into_dictkeys(PyDictKeysObject *keys, PyObject *name)
10481068
keys->dk_nentries++;
10491069
}
10501070
assert (ix < SHARED_KEYS_MAX_SIZE);
1051-
return (int)ix;
1071+
return ix;
10521072
}
10531073

10541074
/*
@@ -1093,9 +1113,7 @@ insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
10931113
ep->me_hash = hash;
10941114
if (mp->ma_values) {
10951115
Py_ssize_t index = mp->ma_keys->dk_nentries;
1096-
assert(index < SHARED_KEYS_MAX_SIZE);
1097-
assert((mp->ma_values->mv_order >> 60) == 0);
1098-
mp->ma_values->mv_order = ((mp->ma_values->mv_order)<<4) | index;
1116+
_PyDictValues_AddToInsertionOrder(mp->ma_values, index);
10991117
assert (mp->ma_values->values[index] == NULL);
11001118
mp->ma_values->values[index] = value;
11011119
}
@@ -1115,7 +1133,7 @@ insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
11151133
if (_PyDict_HasSplitTable(mp)) {
11161134
mp->ma_values->values[ix] = value;
11171135
if (old_value == NULL) {
1118-
mp->ma_values->mv_order = (mp->ma_values->mv_order << 4) | ix;
1136+
_PyDictValues_AddToInsertionOrder(mp->ma_values, ix);
11191137
mp->ma_used++;
11201138
}
11211139
}
@@ -1598,19 +1616,20 @@ _PyDict_SetItem_KnownHash(PyObject *op, PyObject *key, PyObject *value,
15981616
return insertdict(mp, key, hash, value);
15991617
}
16001618

1601-
static uint64_t
1602-
delete_index_from_order(uint64_t order, Py_ssize_t ix)
1603-
{ /* Update order */
1604-
for (int i = 0;; i+= 4) {
1605-
assert (i < 64);
1606-
if (((order >> i) & 15) == (uint64_t)ix) {
1607-
/* Remove 4 bits at ith position */
1608-
uint64_t high = ((order>>i)>>4)<<i;
1609-
uint64_t low = order & ((((uint64_t)1)<<i)-1);
1610-
return high | low;
1611-
}
1619+
static void
1620+
delete_index_from_values(PyDictValues *values, Py_ssize_t ix)
1621+
{
1622+
uint8_t *size_ptr = ((uint8_t *)values)-2;
1623+
int size = *size_ptr;
1624+
int i;
1625+
for (i = 1; size_ptr[-i] != ix; i++) {
1626+
assert(i <= size);
16121627
}
1613-
Py_UNREACHABLE();
1628+
assert(i <= size);
1629+
for (; i < size; i++) {
1630+
size_ptr[-i] = size_ptr[-i-1];
1631+
}
1632+
*size_ptr = size -1;
16141633
}
16151634

16161635
static int
@@ -1631,8 +1650,7 @@ delitem_common(PyDictObject *mp, Py_hash_t hash, Py_ssize_t ix,
16311650
mp->ma_values->values[ix] = NULL;
16321651
assert(ix < SHARED_KEYS_MAX_SIZE);
16331652
/* Update order */
1634-
mp->ma_values->mv_order =
1635-
delete_index_from_order(mp->ma_values->mv_order, ix);
1653+
delete_index_from_values(mp->ma_values, ix);
16361654
ASSERT_CONSISTENT(mp);
16371655
}
16381656
else {
@@ -2729,7 +2747,8 @@ PyDict_Copy(PyObject *o)
27292747
free_values(newvalues);
27302748
return NULL;
27312749
}
2732-
newvalues->mv_order = mp->ma_values->mv_order;
2750+
size_t prefix_size = ((uint8_t *)newvalues)[-1];
2751+
memcpy(((char *)newvalues)-prefix_size, ((char *)mp->ma_values)-prefix_size, prefix_size-1);
27332752
split_copy->ma_values = newvalues;
27342753
split_copy->ma_keys = mp->ma_keys;
27352754
split_copy->ma_used = mp->ma_used;
@@ -3031,11 +3050,11 @@ PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj)
30313050
ep->me_key = key;
30323051
ep->me_hash = hash;
30333052
if (_PyDict_HasSplitTable(mp)) {
3034-
int index = (int)mp->ma_keys->dk_nentries;
3053+
Py_ssize_t index = (int)mp->ma_keys->dk_nentries;
30353054
assert(index < SHARED_KEYS_MAX_SIZE);
30363055
assert(mp->ma_values->values[index] == NULL);
30373056
mp->ma_values->values[index] = value;
3038-
mp->ma_values->mv_order = (mp->ma_values->mv_order << 4) | index;
3057+
_PyDictValues_AddToInsertionOrder(mp->ma_values, index);
30393058
}
30403059
else {
30413060
ep->me_value = value;
@@ -3053,7 +3072,7 @@ PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj)
30533072
Py_INCREF(value);
30543073
MAINTAIN_TRACKING(mp, key, value);
30553074
mp->ma_values->values[ix] = value;
3056-
mp->ma_values->mv_order = (mp->ma_values->mv_order << 4) | ix;
3075+
_PyDictValues_AddToInsertionOrder(mp->ma_values, ix);
30573076
mp->ma_used++;
30583077
mp->ma_version_tag = DICT_NEXT_VERSION();
30593078
}
@@ -4941,7 +4960,7 @@ dictvalues_reversed(_PyDictViewObject *dv, PyObject *Py_UNUSED(ignored))
49414960
PyDictKeysObject *
49424961
_PyDict_NewKeysForClass(void)
49434962
{
4944-
PyDictKeysObject *keys = new_keys_object(5); /* log2(32) */
4963+
PyDictKeysObject *keys = new_keys_object(NEXT_LOG2_SHARED_KEYS_MAX_SIZE);
49454964
if (keys == NULL) {
49464965
PyErr_Clear();
49474966
}
@@ -4974,7 +4993,8 @@ init_inline_values(PyObject *obj, PyTypeObject *tp)
49744993
PyErr_NoMemory();
49754994
return -1;
49764995
}
4977-
values->mv_order = 0;
4996+
assert(((uint8_t *)values)[-1] >= size+2);
4997+
((uint8_t *)values)[-2] = 0;
49784998
for (int i = 0; i < size; i++) {
49794999
values->values[i] = NULL;
49805000
}
@@ -5047,14 +5067,14 @@ _PyObject_StoreInstanceAttribute(PyObject *obj, PyDictValues *values,
50475067
assert(keys != NULL);
50485068
assert(values != NULL);
50495069
assert(Py_TYPE(obj)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
5050-
int ix = insert_into_dictkeys(keys, name);
5070+
Py_ssize_t ix = insert_into_dictkeys(keys, name);
50515071
if (ix == DKIX_EMPTY) {
50525072
if (value == NULL) {
50535073
PyErr_SetObject(PyExc_AttributeError, name);
50545074
return -1;
50555075
}
50565076
#ifdef Py_STATS
5057-
if (shared_keys_usable_size(keys) > 14) {
5077+
if (shared_keys_usable_size(keys) == SHARED_KEYS_MAX_SIZE) {
50585078
OBJECT_STAT_INC(dict_materialized_too_big);
50595079
}
50605080
else {
@@ -5077,11 +5097,11 @@ _PyObject_StoreInstanceAttribute(PyObject *obj, PyDictValues *values,
50775097
PyErr_SetObject(PyExc_AttributeError, name);
50785098
return -1;
50795099
}
5080-
values->mv_order = (values->mv_order << 4) | ix;
5100+
_PyDictValues_AddToInsertionOrder(values, ix);
50815101
}
50825102
else {
50835103
if (value == NULL) {
5084-
values->mv_order = delete_index_from_order(values->mv_order, ix);
5104+
delete_index_from_values(values, ix);
50855105
}
50865106
Py_DECREF(old_value);
50875107
}

Python/ceval.c

+2-3
Original file line numberDiff line numberDiff line change
@@ -3536,14 +3536,13 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
35363536
PyDictValues *values = *_PyObject_ValuesPointer(owner);
35373537
DEOPT_IF(values == NULL, STORE_ATTR);
35383538
STAT_INC(STORE_ATTR, hit);
3539-
int index = cache0->index;
3539+
Py_ssize_t index = cache0->index;
35403540
STACK_SHRINK(1);
35413541
PyObject *value = POP();
35423542
PyObject *old_value = values->values[index];
35433543
values->values[index] = value;
35443544
if (old_value == NULL) {
3545-
assert(index < 16);
3546-
values->mv_order = (values->mv_order << 4) | index;
3545+
_PyDictValues_AddToInsertionOrder(values, index);
35473546
}
35483547
else {
35493548
Py_DECREF(old_value);

0 commit comments

Comments
 (0)