Skip to content

bpo-46675: Allow object value arrays and split key dictionaries larger than 16 #31191

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Feb 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 22 additions & 4 deletions Include/internal/pycore_dict.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,17 @@ struct _dictkeysobject {
see the DK_ENTRIES() macro */
};

/* This must be no more than 16, for the order vector to fit in 64 bits */
#define SHARED_KEYS_MAX_SIZE 16

/* This must be no more than 250, for the prefix size to fit in one byte. */
#define SHARED_KEYS_MAX_SIZE 30
#define NEXT_LOG2_SHARED_KEYS_MAX_SIZE 6

/* Layout of dict values:
*
* The PyObject *values are preceded by an array of bytes holding
* the insertion order and size.
* [-1] = prefix size. [-2] = used size. size[-2-n...] = insertion order.
*/
struct _dictvalues {
uint64_t mv_order;
PyObject *values[1];
};

Expand Down Expand Up @@ -131,6 +137,18 @@ extern uint64_t _pydict_global_version;

PyObject *_PyObject_MakeDictFromInstanceAttributes(PyObject *obj, PyDictValues *values);

static inline void
_PyDictValues_AddToInsertionOrder(PyDictValues *values, Py_ssize_t ix)
{
assert(ix < SHARED_KEYS_MAX_SIZE);
uint8_t *size_ptr = ((uint8_t *)values)-2;
int size = *size_ptr;
assert(size+2 < ((uint8_t *)values)[-1]);
size++;
size_ptr[-size] = (uint8_t)ix;
*size_ptr = size;
}

#ifdef __cplusplus
}
#endif
Expand Down
2 changes: 1 addition & 1 deletion Lib/test/test_descr.py
Original file line number Diff line number Diff line change
Expand Up @@ -5505,7 +5505,7 @@ class B(A):
pass

#Shrink keys by repeatedly creating instances
[(A(), B()) for _ in range(20)]
[(A(), B()) for _ in range(30)]

a, b = A(), B()
self.assertEqual(sys.getsizeof(vars(a)), sys.getsizeof(vars(b)))
Expand Down
9 changes: 5 additions & 4 deletions Lib/test/test_sys.py
Original file line number Diff line number Diff line change
Expand Up @@ -1504,15 +1504,16 @@ def delx(self): del self.__x
'6P')
class newstyleclass(object): pass
# Separate block for PyDictKeysObject with 8 keys and 5 entries
check(newstyleclass, s + calcsize(DICT_KEY_STRUCT_FORMAT) + 32 + 21*calcsize("n2P"))
check(newstyleclass, s + calcsize(DICT_KEY_STRUCT_FORMAT) + 64 + 42*calcsize("n2P"))
# dict with shared keys
check(newstyleclass().__dict__, size('nQ2P') + 15*self.P)
[newstyleclass() for _ in range(100)]
check(newstyleclass().__dict__, size('nQ2P') + self.P)
o = newstyleclass()
o.a = o.b = o.c = o.d = o.e = o.f = o.g = o.h = 1
# Separate block for PyDictKeysObject with 16 keys and 10 entries
check(newstyleclass, s + calcsize(DICT_KEY_STRUCT_FORMAT) + 32 + 21*calcsize("n2P"))
check(newstyleclass, s + calcsize(DICT_KEY_STRUCT_FORMAT) + 64 + 42*calcsize("n2P"))
# dict with shared keys
check(newstyleclass().__dict__, size('nQ2P') + 13*self.P)
check(newstyleclass().__dict__, size('nQ2P') + self.P)
# unicode
# each tuple contains a string and its expected character size
# don't put any static strings here, as they may contain
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Allow more than 16 items in a split dict before it is combined. The limit is
now 254.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

98 changes: 59 additions & 39 deletions Objects/dictobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -454,8 +454,14 @@ static PyDictKeysObject empty_keys_struct = {
};


static PyDictValues empty_values_struct = { 0, { NULL }};
#define empty_values (&empty_values_struct)
struct {
uint8_t prefix[sizeof(PyObject *)];
PyDictValues values;
} empty_values_struct = {
{ [sizeof(PyObject *)-1] = sizeof(PyObject *) },
{{NULL}}
};
#define empty_values (&empty_values_struct.values)

#define Py_EMPTY_KEYS &empty_keys_struct

Expand All @@ -471,9 +477,9 @@ static PyDictValues empty_values_struct = { 0, { NULL }};
static inline int
get_index_from_order(PyDictObject *mp, Py_ssize_t i)
{
assert(mp->ma_used <= 16);
int shift = (int)(mp->ma_used-1-i)*4;
return (int)(mp->ma_values->mv_order >> shift) & 15;
assert(mp->ma_used <= SHARED_KEYS_MAX_SIZE);
assert(i < (((char *)mp->ma_values)[-2]));
return ((char *)mp->ma_values)[-3-i];
}

int
Expand Down Expand Up @@ -637,11 +643,25 @@ free_keys_object(PyDictKeysObject *keys)
static inline PyDictValues*
new_values(Py_ssize_t size)
{
Py_ssize_t n = sizeof(PyDictValues) + sizeof(PyObject *) * (size-1);
return (PyDictValues*)PyMem_Malloc(n);
assert(size > 0);
size_t prefix_size = _Py_SIZE_ROUND_UP(size+2, sizeof(PyObject *));
assert(prefix_size < 256);
size_t n = prefix_size + size * sizeof(PyObject *);
uint8_t *mem = PyMem_Malloc(n);
if (mem == NULL) {
return NULL;
}
assert(prefix_size % sizeof(PyObject *) == 0);
mem[prefix_size-1] = (uint8_t)prefix_size;
return (PyDictValues*)(mem + prefix_size);
}

#define free_values(values) PyMem_Free(values)
static inline void
free_values(PyDictValues *values)
{
int prefix_size = ((uint8_t *)values)[-1];
PyMem_Free(((char *)values)-prefix_size);
}

/* Consumes a reference to the keys object */
static PyObject *
Expand Down Expand Up @@ -700,7 +720,7 @@ new_dict_with_shared_keys(PyDictKeysObject *keys)
dictkeys_decref(keys);
return PyErr_NoMemory();
}
values->mv_order = 0;
((char *)values)[-2] = 0;
for (i = 0; i < size; i++) {
values->values[i] = NULL;
}
Expand Down Expand Up @@ -1018,7 +1038,7 @@ insertion_resize(PyDictObject *mp)
return dictresize(mp, calculate_log2_keysize(GROWTH_RATE(mp)));
}

static int
static Py_ssize_t
insert_into_dictkeys(PyDictKeysObject *keys, PyObject *name)
{
assert(PyUnicode_CheckExact(name));
Expand Down Expand Up @@ -1049,7 +1069,7 @@ insert_into_dictkeys(PyDictKeysObject *keys, PyObject *name)
keys->dk_nentries++;
}
assert (ix < SHARED_KEYS_MAX_SIZE);
return (int)ix;
return ix;
}

/*
Expand Down Expand Up @@ -1094,9 +1114,7 @@ insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
ep->me_hash = hash;
if (mp->ma_values) {
Py_ssize_t index = mp->ma_keys->dk_nentries;
assert(index < SHARED_KEYS_MAX_SIZE);
assert((mp->ma_values->mv_order >> 60) == 0);
mp->ma_values->mv_order = ((mp->ma_values->mv_order)<<4) | index;
_PyDictValues_AddToInsertionOrder(mp->ma_values, index);
assert (mp->ma_values->values[index] == NULL);
mp->ma_values->values[index] = value;
}
Expand All @@ -1116,7 +1134,7 @@ insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
if (_PyDict_HasSplitTable(mp)) {
mp->ma_values->values[ix] = value;
if (old_value == NULL) {
mp->ma_values->mv_order = (mp->ma_values->mv_order << 4) | ix;
_PyDictValues_AddToInsertionOrder(mp->ma_values, ix);
mp->ma_used++;
}
}
Expand Down Expand Up @@ -1599,19 +1617,20 @@ _PyDict_SetItem_KnownHash(PyObject *op, PyObject *key, PyObject *value,
return insertdict(mp, key, hash, value);
}

static uint64_t
delete_index_from_order(uint64_t order, Py_ssize_t ix)
{ /* Update order */
for (int i = 0;; i+= 4) {
assert (i < 64);
if (((order >> i) & 15) == (uint64_t)ix) {
/* Remove 4 bits at ith position */
uint64_t high = ((order>>i)>>4)<<i;
uint64_t low = order & ((((uint64_t)1)<<i)-1);
return high | low;
}
static void
delete_index_from_values(PyDictValues *values, Py_ssize_t ix)
{
uint8_t *size_ptr = ((uint8_t *)values)-2;
int size = *size_ptr;
int i;
for (i = 1; size_ptr[-i] != ix; i++) {
assert(i <= size);
}
Py_UNREACHABLE();
assert(i <= size);
for (; i < size; i++) {
size_ptr[-i] = size_ptr[-i-1];
}
*size_ptr = size -1;
}

static int
Expand All @@ -1632,8 +1651,7 @@ delitem_common(PyDictObject *mp, Py_hash_t hash, Py_ssize_t ix,
mp->ma_values->values[ix] = NULL;
assert(ix < SHARED_KEYS_MAX_SIZE);
/* Update order */
mp->ma_values->mv_order =
delete_index_from_order(mp->ma_values->mv_order, ix);
delete_index_from_values(mp->ma_values, ix);
ASSERT_CONSISTENT(mp);
}
else {
Expand Down Expand Up @@ -2730,7 +2748,8 @@ PyDict_Copy(PyObject *o)
free_values(newvalues);
return NULL;
}
newvalues->mv_order = mp->ma_values->mv_order;
size_t prefix_size = ((uint8_t *)newvalues)[-1];
memcpy(((char *)newvalues)-prefix_size, ((char *)mp->ma_values)-prefix_size, prefix_size-1);
split_copy->ma_values = newvalues;
split_copy->ma_keys = mp->ma_keys;
split_copy->ma_used = mp->ma_used;
Expand Down Expand Up @@ -3032,11 +3051,11 @@ PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj)
ep->me_key = key;
ep->me_hash = hash;
if (_PyDict_HasSplitTable(mp)) {
int index = (int)mp->ma_keys->dk_nentries;
Py_ssize_t index = (int)mp->ma_keys->dk_nentries;
assert(index < SHARED_KEYS_MAX_SIZE);
assert(mp->ma_values->values[index] == NULL);
mp->ma_values->values[index] = value;
mp->ma_values->mv_order = (mp->ma_values->mv_order << 4) | index;
_PyDictValues_AddToInsertionOrder(mp->ma_values, index);
}
else {
ep->me_value = value;
Expand All @@ -3054,7 +3073,7 @@ PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj)
Py_INCREF(value);
MAINTAIN_TRACKING(mp, key, value);
mp->ma_values->values[ix] = value;
mp->ma_values->mv_order = (mp->ma_values->mv_order << 4) | ix;
_PyDictValues_AddToInsertionOrder(mp->ma_values, ix);
mp->ma_used++;
mp->ma_version_tag = DICT_NEXT_VERSION();
}
Expand Down Expand Up @@ -4942,7 +4961,7 @@ dictvalues_reversed(_PyDictViewObject *dv, PyObject *Py_UNUSED(ignored))
PyDictKeysObject *
_PyDict_NewKeysForClass(void)
{
PyDictKeysObject *keys = new_keys_object(5); /* log2(32) */
PyDictKeysObject *keys = new_keys_object(NEXT_LOG2_SHARED_KEYS_MAX_SIZE);
if (keys == NULL) {
PyErr_Clear();
}
Expand Down Expand Up @@ -4975,7 +4994,8 @@ init_inline_values(PyObject *obj, PyTypeObject *tp)
PyErr_NoMemory();
return -1;
}
values->mv_order = 0;
assert(((uint8_t *)values)[-1] >= size+2);
((uint8_t *)values)[-2] = 0;
for (int i = 0; i < size; i++) {
values->values[i] = NULL;
}
Expand Down Expand Up @@ -5048,14 +5068,14 @@ _PyObject_StoreInstanceAttribute(PyObject *obj, PyDictValues *values,
assert(keys != NULL);
assert(values != NULL);
assert(Py_TYPE(obj)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
int ix = insert_into_dictkeys(keys, name);
Py_ssize_t ix = insert_into_dictkeys(keys, name);
if (ix == DKIX_EMPTY) {
if (value == NULL) {
PyErr_SetObject(PyExc_AttributeError, name);
return -1;
}
#ifdef Py_STATS
if (shared_keys_usable_size(keys) > 14) {
if (shared_keys_usable_size(keys) == SHARED_KEYS_MAX_SIZE) {
OBJECT_STAT_INC(dict_materialized_too_big);
}
else {
Expand All @@ -5078,11 +5098,11 @@ _PyObject_StoreInstanceAttribute(PyObject *obj, PyDictValues *values,
PyErr_SetObject(PyExc_AttributeError, name);
return -1;
}
values->mv_order = (values->mv_order << 4) | ix;
_PyDictValues_AddToInsertionOrder(values, ix);
}
else {
if (value == NULL) {
values->mv_order = delete_index_from_order(values->mv_order, ix);
delete_index_from_values(values, ix);
}
Py_DECREF(old_value);
}
Expand Down
5 changes: 2 additions & 3 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -3573,14 +3573,13 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
PyDictValues *values = *_PyObject_ValuesPointer(owner);
DEOPT_IF(values == NULL, STORE_ATTR);
STAT_INC(STORE_ATTR, hit);
int index = cache0->index;
Py_ssize_t index = cache0->index;
STACK_SHRINK(1);
PyObject *value = POP();
PyObject *old_value = values->values[index];
values->values[index] = value;
if (old_value == NULL) {
assert(index < 16);
values->mv_order = (values->mv_order << 4) | index;
_PyDictValues_AddToInsertionOrder(values, index);
}
else {
Py_DECREF(old_value);
Expand Down