Skip to content

Commit 0088371

Browse files
committed
Cleanup old dict lookup functions.
1 parent 730113d commit 0088371

File tree

2 files changed

+44
-136
lines changed

2 files changed

+44
-136
lines changed

Objects/dictobject.c

+42-134
Original file line numberDiff line numberDiff line change
@@ -227,14 +227,6 @@ equally good collision statistics, needed less code & used less memory.
227227
228228
*/
229229

230-
/* forward declarations */
231-
static Py_ssize_t lookdict(PyDictObject *mp, PyObject *key,
232-
Py_hash_t hash, PyObject **value_addr);
233-
static Py_ssize_t lookdict_unicode(PyDictObject *mp, PyObject *key,
234-
Py_hash_t hash, PyObject **value_addr);
235-
static Py_ssize_t lookdict_split(PyDictObject *mp, PyObject *key,
236-
Py_hash_t hash, PyObject **value_addr);
237-
238230
static int dictresize(PyDictObject *mp, uint8_t log_newsize);
239231

240232
static PyObject* dict_iter(PyDictObject *dict);
@@ -774,34 +766,46 @@ probe indices are computed as explained earlier.
774766
775767
All arithmetic on hash should ignore overflow.
776768
777-
The details in this version are due to Tim Peters, building on many past
778-
contributions by Reimer Behrends, Jyrki Alakuijala, Vladimir Marangozov and
779-
Christian Tismer.
780-
781-
lookdict() is general-purpose, and may return DKIX_ERROR if (and only if) a
769+
_Py_dict_lookup() is general-purpose, and may return DKIX_ERROR if (and only if) a
782770
comparison raises an exception.
783-
lookdict_unicode() below is specialized to string keys, comparison of which can
784-
never raise an exception; that function can never return DKIX_ERROR when key
785-
is string. Otherwise, it falls back to lookdict().
786771
When the key isn't found a DKIX_EMPTY is returned.
787772
*/
788-
static Py_ssize_t _Py_HOT_FUNCTION
789-
lookdict(PyDictObject *mp, PyObject *key,
790-
Py_hash_t hash, PyObject **value_addr)
773+
Py_ssize_t _Py_HOT_FUNCTION
774+
_Py_dict_lookup(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject **value_addr)
791775
{
792-
size_t i, mask, perturb;
793776
PyDictKeysObject *dk;
794-
PyDictKeyEntry *ep0;
795-
796-
top:
777+
start:
797778
dk = mp->ma_keys;
798-
ep0 = DK_ENTRIES(dk);
799-
mask = DK_MASK(dk);
800-
perturb = hash;
801-
i = (size_t)hash & mask;
802-
779+
DictKeysKind kind = dk->dk_kind;
780+
PyDictKeyEntry *ep0 = DK_ENTRIES(dk);
781+
size_t mask = DK_MASK(dk);
782+
size_t perturb = hash;
783+
size_t i = (size_t)hash & mask;
784+
Py_ssize_t ix;
785+
if (PyUnicode_CheckExact(key) && kind != DICT_KEYS_GENERAL) {
786+
/* Strings only */
787+
for (;;) {
788+
ix = dictkeys_get_index(mp->ma_keys, i);
789+
if (ix == DKIX_EMPTY) {
790+
*value_addr = NULL;
791+
return DKIX_EMPTY;
792+
}
793+
if (ix >= 0) {
794+
PyDictKeyEntry *ep = &ep0[ix];
795+
assert(ep->me_key != NULL);
796+
assert(PyUnicode_CheckExact(ep->me_key));
797+
if (ep->me_key == key ||
798+
(ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
799+
goto found;
800+
}
801+
}
802+
perturb >>= PERTURB_SHIFT;
803+
i = mask & (i*5 + perturb + 1);
804+
}
805+
Py_UNREACHABLE();
806+
}
803807
for (;;) {
804-
Py_ssize_t ix = dictkeys_get_index(dk, i);
808+
ix = dictkeys_get_index(dk, i);
805809
if (ix == DKIX_EMPTY) {
806810
*value_addr = NULL;
807811
return ix;
@@ -810,8 +814,7 @@ lookdict(PyDictObject *mp, PyObject *key,
810814
PyDictKeyEntry *ep = &ep0[ix];
811815
assert(ep->me_key != NULL);
812816
if (ep->me_key == key) {
813-
*value_addr = ep->me_value;
814-
return ix;
817+
goto found;
815818
}
816819
if (ep->me_hash == hash) {
817820
PyObject *startkey = ep->me_key;
@@ -824,122 +827,27 @@ lookdict(PyDictObject *mp, PyObject *key,
824827
}
825828
if (dk == mp->ma_keys && ep->me_key == startkey) {
826829
if (cmp > 0) {
827-
*value_addr = ep->me_value;
828-
return ix;
830+
goto found;
829831
}
830832
}
831833
else {
832834
/* The dict was mutated, restart */
833-
goto top;
835+
goto start;
834836
}
835837
}
836838
}
837839
perturb >>= PERTURB_SHIFT;
838840
i = (i*5 + perturb + 1) & mask;
839841
}
840842
Py_UNREACHABLE();
841-
}
842-
843-
/* Specialized version for string-only keys */
844-
static Py_ssize_t _Py_HOT_FUNCTION
845-
lookdict_unicode(PyDictObject *mp, PyObject *key,
846-
Py_hash_t hash, PyObject **value_addr)
847-
{
848-
assert(mp->ma_values == NULL);
849-
/* Make sure this function doesn't have to handle non-unicode keys,
850-
including subclasses of str; e.g., one reason to subclass
851-
unicodes is to override __eq__, and for speed we don't cater to
852-
that here. */
853-
if (!PyUnicode_CheckExact(key)) {
854-
return lookdict(mp, key, hash, value_addr);
855-
}
856-
857-
PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys);
858-
size_t mask = DK_MASK(mp->ma_keys);
859-
size_t perturb = (size_t)hash;
860-
size_t i = (size_t)hash & mask;
861-
862-
for (;;) {
863-
Py_ssize_t ix = dictkeys_get_index(mp->ma_keys, i);
864-
if (ix == DKIX_EMPTY) {
865-
*value_addr = NULL;
866-
return DKIX_EMPTY;
867-
}
868-
if (ix >= 0) {
869-
PyDictKeyEntry *ep = &ep0[ix];
870-
assert(ep->me_key != NULL);
871-
assert(PyUnicode_CheckExact(ep->me_key));
872-
if (ep->me_key == key ||
873-
(ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
874-
*value_addr = ep->me_value;
875-
return ix;
876-
}
877-
}
878-
perturb >>= PERTURB_SHIFT;
879-
i = mask & (i*5 + perturb + 1);
880-
}
881-
Py_UNREACHABLE();
882-
}
883-
884-
/* Version of lookdict for split tables.
885-
* All split tables and only split tables use this lookup function.
886-
* Split tables only contain unicode keys and no dummy keys,
887-
* so algorithm is the same as lookdict_unicode, but doesn't check for dummy keys.
888-
*/
889-
static Py_ssize_t _Py_HOT_FUNCTION
890-
lookdict_split(PyDictObject *mp, PyObject *key,
891-
Py_hash_t hash, PyObject **value_addr)
892-
{
893-
/* mp must split table */
894-
assert(mp->ma_values != NULL);
895-
if (!PyUnicode_CheckExact(key)) {
896-
Py_ssize_t ix = lookdict(mp, key, hash, value_addr);
897-
if (ix >= 0) {
898-
*value_addr = mp->ma_values[ix];
899-
}
900-
return ix;
843+
found:
844+
if (dk->dk_kind == DICT_KEYS_SPLIT) {
845+
*value_addr = mp->ma_values[ix];
901846
}
902-
903-
PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys);
904-
size_t mask = DK_MASK(mp->ma_keys);
905-
size_t perturb = (size_t)hash;
906-
size_t i = (size_t)hash & mask;
907-
908-
for (;;) {
909-
Py_ssize_t ix = dictkeys_get_index(mp->ma_keys, i);
910-
assert (ix != DKIX_DUMMY);
911-
if (ix == DKIX_EMPTY) {
912-
*value_addr = NULL;
913-
return DKIX_EMPTY;
914-
}
915-
PyDictKeyEntry *ep = &ep0[ix];
916-
assert(ep->me_key != NULL);
917-
assert(PyUnicode_CheckExact(ep->me_key));
918-
if (ep->me_key == key ||
919-
(ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
920-
*value_addr = mp->ma_values[ix];
921-
return ix;
922-
}
923-
perturb >>= PERTURB_SHIFT;
924-
i = mask & (i*5 + perturb + 1);
847+
else {
848+
*value_addr = ep0[ix].me_value;
925849
}
926-
Py_UNREACHABLE();
927-
}
928-
929-
Py_ssize_t
930-
_Py_dict_lookup(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject **value_addr)
931-
{
932-
DictKeysKind kind = mp->ma_keys->dk_kind;
933-
switch (kind) {
934-
case DICT_KEYS_GENERAL:
935-
return lookdict(mp, key, hash, value_addr);
936-
case DICT_KEYS_UNICODE:
937-
case DICT_KEYS_UNICODE_NO_DUMMY:
938-
return lookdict_unicode(mp, key, hash, value_addr);
939-
case DICT_KEYS_SPLIT:
940-
return lookdict_split(mp, key, hash, value_addr);
941-
};
942-
Py_UNREACHABLE();
850+
return ix;
943851
}
944852

945853
int

Objects/odictobject.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@ we've considered:
4141
The approach with the least performance impact (time and space) is #2,
4242
mirroring the key order of dict's dk_entries with an array of node pointers.
4343
While _Py_dict_lookup() does not give us the index into the array,
44-
we make use of pointer arithmetic to get that index. An alternative
45-
would be to refactor lookdict() to provide the index, explicitly exposing
44+
we make use of pointer arithmetic to get that index. An alternative would
45+
be to refactor _Py_dict_lookup() to provide the index, explicitly exposing
4646
the implementation detail. We could even just use a custom lookup function
4747
for OrderedDict that facilitates our need. However, both approaches are
4848
significantly more complicated than just using pointer arithmetic.

0 commit comments

Comments
 (0)