Skip to content

Commit 7c6e970

Browse files
authored
bpo-40602: Optimize _Py_hashtable for pointer keys (GH-20051)
Optimize _Py_hashtable_get() and _Py_hashtable_get_entry() for pointer keys: * key_size == sizeof(void*) * hash_func == _Py_hashtable_hash_ptr * compare_func == _Py_hashtable_compare_direct Changes: * Add get_func and get_entry_func members to _Py_hashtable_t * Convert _Py_hashtable_get() and _Py_hashtable_get_entry() functions to static nline functions. * Add specialized get and get entry for pointer keys.
1 parent 74ea6b5 commit 7c6e970

File tree

2 files changed

+153
-94
lines changed

2 files changed

+153
-94
lines changed

Include/internal/pycore_hashtable.h

+25-15
Original file line numberDiff line numberDiff line change
@@ -76,12 +76,17 @@ typedef struct {
7676

7777
/* Forward declaration */
7878
struct _Py_hashtable_t;
79+
typedef struct _Py_hashtable_t _Py_hashtable_t;
7980

80-
typedef Py_uhash_t (*_Py_hashtable_hash_func) (struct _Py_hashtable_t *ht,
81+
typedef Py_uhash_t (*_Py_hashtable_hash_func) (_Py_hashtable_t *ht,
8182
const void *pkey);
82-
typedef int (*_Py_hashtable_compare_func) (struct _Py_hashtable_t *ht,
83+
typedef int (*_Py_hashtable_compare_func) (_Py_hashtable_t *ht,
8384
const void *pkey,
8485
const _Py_hashtable_entry_t *he);
86+
typedef _Py_hashtable_entry_t* (*_Py_hashtable_get_entry_func)(_Py_hashtable_t *ht,
87+
const void *pkey);
88+
typedef int (*_Py_hashtable_get_func) (_Py_hashtable_t *ht,
89+
const void *pkey, void *data);
8590

8691
typedef struct {
8792
/* allocate a memory block */
@@ -93,18 +98,19 @@ typedef struct {
9398

9499

95100
/* _Py_hashtable: table */
96-
97-
typedef struct _Py_hashtable_t {
101+
struct _Py_hashtable_t {
98102
size_t num_buckets;
99103
size_t entries; /* Total number of entries in the table. */
100104
_Py_slist_t *buckets;
101105
size_t key_size;
102106
size_t data_size;
103107

108+
_Py_hashtable_get_func get_func;
109+
_Py_hashtable_get_entry_func get_entry_func;
104110
_Py_hashtable_hash_func hash_func;
105111
_Py_hashtable_compare_func compare_func;
106112
_Py_hashtable_allocator_t alloc;
107-
} _Py_hashtable_t;
113+
};
108114

109115
/* hash a pointer (void*) */
110116
PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_ptr(
@@ -176,10 +182,12 @@ PyAPI_FUNC(int) _Py_hashtable_set(
176182
177183
Don't call directly this function, but use _Py_HASHTABLE_GET_ENTRY()
178184
macro */
179-
PyAPI_FUNC(_Py_hashtable_entry_t*) _Py_hashtable_get_entry(
180-
_Py_hashtable_t *ht,
181-
size_t key_size,
182-
const void *pkey);
185+
static inline _Py_hashtable_entry_t *
186+
_Py_hashtable_get_entry(_Py_hashtable_t *ht, size_t key_size, const void *pkey)
187+
{
188+
assert(key_size == ht->key_size);
189+
return ht->get_entry_func(ht, pkey);
190+
}
183191

184192
#define _Py_HASHTABLE_GET_ENTRY(TABLE, KEY) \
185193
_Py_hashtable_get_entry(TABLE, sizeof(KEY), &(KEY))
@@ -189,12 +197,14 @@ PyAPI_FUNC(_Py_hashtable_entry_t*) _Py_hashtable_get_entry(
189197
exists, return 0 if the entry does not exist.
190198
191199
Don't call directly this function, but use _Py_HASHTABLE_GET() macro */
192-
PyAPI_FUNC(int) _Py_hashtable_get(
193-
_Py_hashtable_t *ht,
194-
size_t key_size,
195-
const void *pkey,
196-
size_t data_size,
197-
void *data);
200+
static inline int
201+
_Py_hashtable_get(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
202+
size_t data_size, void *data)
203+
{
204+
assert(key_size == ht->key_size);
205+
assert(data_size == ht->data_size);
206+
return ht->get_func(ht, pkey, data);
207+
}
198208

199209
#define _Py_HASHTABLE_GET(TABLE, KEY, DATA) \
200210
_Py_hashtable_get(TABLE, sizeof(KEY), &(KEY), sizeof(DATA), &(DATA))

Python/hashtable.c

+128-79
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,6 @@ Py_uhash_t
108108
_Py_hashtable_hash_ptr(struct _Py_hashtable_t *ht, const void *pkey)
109109
{
110110
void *key;
111-
112111
_Py_HASHTABLE_READ_KEY(ht, pkey, key);
113112
return (Py_uhash_t)_Py_HashPointer(key);
114113
}
@@ -137,61 +136,6 @@ round_size(size_t s)
137136
}
138137

139138

140-
_Py_hashtable_t *
141-
_Py_hashtable_new_full(size_t key_size, size_t data_size,
142-
size_t init_size,
143-
_Py_hashtable_hash_func hash_func,
144-
_Py_hashtable_compare_func compare_func,
145-
_Py_hashtable_allocator_t *allocator)
146-
{
147-
_Py_hashtable_t *ht;
148-
size_t buckets_size;
149-
_Py_hashtable_allocator_t alloc;
150-
151-
if (allocator == NULL) {
152-
alloc.malloc = PyMem_Malloc;
153-
alloc.free = PyMem_Free;
154-
}
155-
else {
156-
alloc = *allocator;
157-
}
158-
159-
ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
160-
if (ht == NULL)
161-
return ht;
162-
163-
ht->num_buckets = round_size(init_size);
164-
ht->entries = 0;
165-
ht->key_size = key_size;
166-
ht->data_size = data_size;
167-
168-
buckets_size = ht->num_buckets * sizeof(ht->buckets[0]);
169-
ht->buckets = alloc.malloc(buckets_size);
170-
if (ht->buckets == NULL) {
171-
alloc.free(ht);
172-
return NULL;
173-
}
174-
memset(ht->buckets, 0, buckets_size);
175-
176-
ht->hash_func = hash_func;
177-
ht->compare_func = compare_func;
178-
ht->alloc = alloc;
179-
return ht;
180-
}
181-
182-
183-
_Py_hashtable_t *
184-
_Py_hashtable_new(size_t key_size, size_t data_size,
185-
_Py_hashtable_hash_func hash_func,
186-
_Py_hashtable_compare_func compare_func)
187-
{
188-
return _Py_hashtable_new_full(key_size, data_size,
189-
HASHTABLE_MIN_SIZE,
190-
hash_func, compare_func,
191-
NULL);
192-
}
193-
194-
195139
size_t
196140
_Py_hashtable_size(_Py_hashtable_t *ht)
197141
{
@@ -251,23 +195,20 @@ _Py_hashtable_print_stats(_Py_hashtable_t *ht)
251195

252196

253197
_Py_hashtable_entry_t *
254-
_Py_hashtable_get_entry(_Py_hashtable_t *ht,
255-
size_t key_size, const void *pkey)
198+
_Py_hashtable_get_entry_generic(_Py_hashtable_t *ht, const void *pkey)
256199
{
257-
Py_uhash_t key_hash;
258-
size_t index;
259-
_Py_hashtable_entry_t *entry;
260-
261-
assert(key_size == ht->key_size);
262-
263-
key_hash = ht->hash_func(ht, pkey);
264-
index = key_hash & (ht->num_buckets - 1);
265-
266-
for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
267-
if (entry->key_hash == key_hash && ht->compare_func(ht, pkey, entry))
200+
Py_uhash_t key_hash = ht->hash_func(ht, pkey);
201+
size_t index = key_hash & (ht->num_buckets - 1);
202+
_Py_hashtable_entry_t *entry = entry = TABLE_HEAD(ht, index);
203+
while (1) {
204+
if (entry == NULL) {
205+
return NULL;
206+
}
207+
if (entry->key_hash == key_hash && ht->compare_func(ht, pkey, entry)) {
268208
break;
209+
}
210+
entry = ENTRY_NEXT(entry);
269211
}
270-
271212
return entry;
272213
}
273214

@@ -324,7 +265,7 @@ _Py_hashtable_set(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
324265
/* Don't write the assertion on a single line because it is interesting
325266
to know the duplicated entry if the assertion failed. The entry can
326267
be read using a debugger. */
327-
entry = _Py_hashtable_get_entry(ht, key_size, pkey);
268+
entry = ht->get_entry_func(ht, pkey);
328269
assert(entry == NULL);
329270
#endif
330271

@@ -352,18 +293,62 @@ _Py_hashtable_set(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
352293

353294

354295
int
355-
_Py_hashtable_get(_Py_hashtable_t *ht, size_t key_size,const void *pkey,
356-
size_t data_size, void *data)
296+
_Py_hashtable_get_generic(_Py_hashtable_t *ht, const void *pkey, void *data)
357297
{
358-
_Py_hashtable_entry_t *entry;
359-
360298
assert(data != NULL);
299+
_Py_hashtable_entry_t *entry = ht->get_entry_func(ht, pkey);
300+
if (entry != NULL) {
301+
ENTRY_READ_PDATA(ht, entry, ht->data_size, data);
302+
return 1;
303+
}
304+
else {
305+
return 0;
306+
}
307+
}
361308

362-
entry = _Py_hashtable_get_entry(ht, key_size, pkey);
363-
if (entry == NULL)
309+
310+
// Specialized for:
311+
// key_size == sizeof(void*)
312+
// hash_func == _Py_hashtable_hash_ptr
313+
// compare_func == _Py_hashtable_compare_direct
314+
_Py_hashtable_entry_t *
315+
_Py_hashtable_get_entry_ptr(_Py_hashtable_t *ht, const void *pkey)
316+
{
317+
Py_uhash_t key_hash = _Py_hashtable_hash_ptr(ht, pkey);
318+
size_t index = key_hash & (ht->num_buckets - 1);
319+
_Py_hashtable_entry_t *entry = entry = TABLE_HEAD(ht, index);
320+
while (1) {
321+
if (entry == NULL) {
322+
return NULL;
323+
}
324+
if (entry->key_hash == key_hash) {
325+
const void *pkey2 = _Py_HASHTABLE_ENTRY_PKEY(entry);
326+
if (memcmp(pkey, pkey2, sizeof(void*)) == 0) {
327+
break;
328+
}
329+
}
330+
entry = ENTRY_NEXT(entry);
331+
}
332+
return entry;
333+
}
334+
335+
336+
// Specialized for:
337+
// key_size == sizeof(void*)
338+
// hash_func == _Py_hashtable_hash_ptr
339+
// compare_func == _Py_hashtable_compare_direct
340+
int
341+
_Py_hashtable_get_ptr(_Py_hashtable_t *ht, const void *pkey, void *data)
342+
{
343+
assert(data != NULL);
344+
_Py_hashtable_entry_t *entry = _Py_hashtable_get_entry_ptr(ht, pkey);
345+
if (entry != NULL) {
346+
ENTRY_READ_PDATA(ht, entry, ht->data_size, data);
347+
return 1;
348+
}
349+
else {
364350
return 0;
365-
ENTRY_READ_PDATA(ht, entry, data_size, data);
366-
return 1;
351+
}
367352
}
368353

369354

@@ -454,6 +439,70 @@ hashtable_rehash(_Py_hashtable_t *ht)
454439
}
455440

456441

442+
_Py_hashtable_t *
443+
_Py_hashtable_new_full(size_t key_size, size_t data_size,
444+
size_t init_size,
445+
_Py_hashtable_hash_func hash_func,
446+
_Py_hashtable_compare_func compare_func,
447+
_Py_hashtable_allocator_t *allocator)
448+
{
449+
_Py_hashtable_t *ht;
450+
size_t buckets_size;
451+
_Py_hashtable_allocator_t alloc;
452+
453+
if (allocator == NULL) {
454+
alloc.malloc = PyMem_Malloc;
455+
alloc.free = PyMem_Free;
456+
}
457+
else {
458+
alloc = *allocator;
459+
}
460+
461+
ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
462+
if (ht == NULL)
463+
return ht;
464+
465+
ht->num_buckets = round_size(init_size);
466+
ht->entries = 0;
467+
ht->key_size = key_size;
468+
ht->data_size = data_size;
469+
470+
buckets_size = ht->num_buckets * sizeof(ht->buckets[0]);
471+
ht->buckets = alloc.malloc(buckets_size);
472+
if (ht->buckets == NULL) {
473+
alloc.free(ht);
474+
return NULL;
475+
}
476+
memset(ht->buckets, 0, buckets_size);
477+
478+
ht->get_func = _Py_hashtable_get_generic;
479+
ht->get_entry_func = _Py_hashtable_get_entry_generic;
480+
ht->hash_func = hash_func;
481+
ht->compare_func = compare_func;
482+
ht->alloc = alloc;
483+
if (ht->key_size == sizeof(void*)
484+
&& ht->hash_func == _Py_hashtable_hash_ptr
485+
&& ht->compare_func == _Py_hashtable_compare_direct)
486+
{
487+
ht->get_func = _Py_hashtable_get_ptr;
488+
ht->get_entry_func = _Py_hashtable_get_entry_ptr;
489+
}
490+
return ht;
491+
}
492+
493+
494+
_Py_hashtable_t *
495+
_Py_hashtable_new(size_t key_size, size_t data_size,
496+
_Py_hashtable_hash_func hash_func,
497+
_Py_hashtable_compare_func compare_func)
498+
{
499+
return _Py_hashtable_new_full(key_size, data_size,
500+
HASHTABLE_MIN_SIZE,
501+
hash_func, compare_func,
502+
NULL);
503+
}
504+
505+
457506
void
458507
_Py_hashtable_clear(_Py_hashtable_t *ht)
459508
{

0 commit comments

Comments
 (0)