Skip to content

bpo-40116: Add insertion order bit-vector to dict values to allow dicts to share keys more freely. #28520

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Oct 6, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Include/cpython/dictobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#endif

typedef struct _dictkeysobject PyDictKeysObject;
typedef struct _dictvalues PyDictValues;

/* The ma_values pointer is NULL for a combined table
* or points to an array of PyObject* for a split table
Expand All @@ -24,7 +25,7 @@ typedef struct {

If ma_values is not NULL, the table is splitted:
keys are stored in ma_keys and values are stored in ma_values */
PyObject **ma_values;
PyDictValues *ma_values;
} PyDictObject;

PyAPI_FUNC(PyObject *) _PyDict_GetItem_KnownHash(PyObject *mp, PyObject *key,
Expand Down
8 changes: 8 additions & 0 deletions Include/internal/pycore_dict.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,14 @@ struct _dictkeysobject {
see the DK_ENTRIES() macro */
};

/* This must be no more than 16, for the order vector to fit in 64 bits */
#define SHARED_KEYS_MAX_SIZE 16

struct _dictvalues {
uint64_t mv_order;
PyObject *values[1];
};

#define DK_LOG_SIZE(dk) ((dk)->dk_log2_size)
#if SIZEOF_VOID_P > 4
#define DK_SIZE(dk) (((int64_t)1)<<DK_LOG_SIZE(dk))
Expand Down
37 changes: 1 addition & 36 deletions Lib/test/test_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -1020,7 +1020,6 @@ def test_splittable_del(self):
with self.assertRaises(KeyError):
del a['y']

self.assertGreater(sys.getsizeof(a), orig_size)
self.assertEqual(list(a), ['x', 'z'])
self.assertEqual(list(b), ['x', 'y', 'z'])

Expand All @@ -1031,16 +1030,12 @@ def test_splittable_del(self):

@support.cpython_only
def test_splittable_pop(self):
"""split table must be combined when d.pop(k)"""
a, b = self.make_shared_key_dict(2)

orig_size = sys.getsizeof(a)

a.pop('y') # split table is combined
a.pop('y')
with self.assertRaises(KeyError):
a.pop('y')

self.assertGreater(sys.getsizeof(a), orig_size)
self.assertEqual(list(a), ['x', 'z'])
self.assertEqual(list(b), ['x', 'y', 'z'])

Expand Down Expand Up @@ -1074,36 +1069,6 @@ def test_splittable_popitem(self):
self.assertEqual(list(a), ['x', 'y'])
self.assertEqual(list(b), ['x', 'y', 'z'])

@support.cpython_only
def test_splittable_setattr_after_pop(self):
"""setattr() must not convert combined table into split table."""
# Issue 28147
import _testcapi

class C:
pass
a = C()

a.a = 1
self.assertTrue(_testcapi.dict_hassplittable(a.__dict__))

# dict.pop() convert it to combined table
a.__dict__.pop('a')
self.assertFalse(_testcapi.dict_hassplittable(a.__dict__))

# But C should not convert a.__dict__ to split table again.
a.a = 1
self.assertFalse(_testcapi.dict_hassplittable(a.__dict__))

# Same for popitem()
a = C()
a.a = 2
self.assertTrue(_testcapi.dict_hassplittable(a.__dict__))
a.__dict__.popitem()
self.assertFalse(_testcapi.dict_hassplittable(a.__dict__))
a.a = 3
self.assertFalse(_testcapi.dict_hassplittable(a.__dict__))

def test_iterator_pickling(self):
for proto in range(pickle.HIGHEST_PROTOCOL + 1):
data = {1:"a", 2:"b", 3:"c"}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Change to the implementation of split dictionaries. Classes where the
instances differ either in the exact set of attributes, or in the order in
which those attributes are set, can still share keys. This should have no
observable effect on users of Python or the C-API. Patch by Mark Shannon.
14 changes: 0 additions & 14 deletions Modules/_testcapimodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -334,19 +334,6 @@ dict_getitem_knownhash(PyObject *self, PyObject *args)
return result;
}

static PyObject*
dict_hassplittable(PyObject *self, PyObject *arg)
{
if (!PyDict_Check(arg)) {
PyErr_Format(PyExc_TypeError,
"dict_hassplittable() argument must be dict, not '%s'",
Py_TYPE(arg)->tp_name);
return NULL;
}

return PyBool_FromLong(_PyDict_HasSplitTable((PyDictObject*)arg));
}

/* Issue #4701: Check that PyObject_Hash implicitly calls
* PyType_Ready if it hasn't already been called
*/
Expand Down Expand Up @@ -5680,7 +5667,6 @@ static PyMethodDef TestMethods[] = {
{"test_list_api", test_list_api, METH_NOARGS},
{"test_dict_iteration", test_dict_iteration, METH_NOARGS},
{"dict_getitem_knownhash", dict_getitem_knownhash, METH_VARARGS},
{"dict_hassplittable", dict_hassplittable, METH_O},
{"test_lazy_hash_inheritance", test_lazy_hash_inheritance,METH_NOARGS},
{"test_long_api", test_long_api, METH_NOARGS},
{"test_xincref_doesnt_leak",test_xincref_doesnt_leak, METH_NOARGS},
Expand Down
Loading