Skip to content

Commit 8b604d1

Browse files
jeffhostetlergitster
authored andcommitted
hashmap: add API to disable item counting when threaded
This is to address concerns raised by ThreadSanitizer on the mailing list about threaded unprotected R/W access to map.size with my previous "disallow rehash" change (0607e10). See: https://public-inbox.org/git/adb37b70139fd1e2bac18bfd22c8b96683ae18eb.1502780344.git.martin.agren@gmail.com/ Add API to hashmap to disable item counting and thus automatic rehashing. Also include API to later re-enable them. When item counting is disabled, the map.size field is invalid. So to prevent accidents, the field has been renamed and an accessor function hashmap_get_size() has been added. All direct references to this field have been been updated. And the name of the field changed to map.private_size to communicate this. Here is the relevant output from ThreadSanitizer showing the problem: WARNING: ThreadSanitizer: data race (pid=10554) Read of size 4 at 0x00000082d488 by thread T2 (mutexes: write M16): #0 hashmap_add hashmap.c:209 #1 hash_dir_entry_with_parent_and_prefix name-hash.c:302 #2 handle_range_dir name-hash.c:347 #3 handle_range_1 name-hash.c:415 #4 lazy_dir_thread_proc name-hash.c:471 #5 <null> <null> Previous write of size 4 at 0x00000082d488 by thread T1 (mutexes: write M31): #0 hashmap_add hashmap.c:209 #1 hash_dir_entry_with_parent_and_prefix name-hash.c:302 #2 handle_range_dir name-hash.c:347 #3 handle_range_1 name-hash.c:415 #4 handle_range_dir name-hash.c:380 #5 handle_range_1 name-hash.c:415 #6 lazy_dir_thread_proc name-hash.c:471 #7 <null> <null> Martin gives instructions for running TSan on test t3008 in this post: https://public-inbox.org/git/CAN0heSoJDL9pWELD6ciLTmWf-a=oyxe4EXXOmCKvsG5MSuzxsA@mail.gmail.com/ Signed-off-by: Jeff Hostetler <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 238e487 commit 8b604d1

File tree

6 files changed

+88
-40
lines changed

6 files changed

+88
-40
lines changed

attr.c

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -151,10 +151,12 @@ struct all_attrs_item {
151151
static void all_attrs_init(struct attr_hashmap *map, struct attr_check *check)
152152
{
153153
int i;
154+
unsigned int size;
154155

155156
hashmap_lock(map);
156157

157-
if (map->map.size < check->all_attrs_nr)
158+
size = hashmap_get_size(&map->map);
159+
if (size < check->all_attrs_nr)
158160
die("BUG: interned attributes shouldn't be deleted");
159161

160162
/*
@@ -163,13 +165,13 @@ static void all_attrs_init(struct attr_hashmap *map, struct attr_check *check)
163165
* field), reallocate the provided attr_check instance's all_attrs
164166
* field and fill each entry with its corresponding git_attr.
165167
*/
166-
if (map->map.size != check->all_attrs_nr) {
168+
if (size != check->all_attrs_nr) {
167169
struct attr_hash_entry *e;
168170
struct hashmap_iter iter;
169171
hashmap_iter_init(&map->map, &iter);
170172

171-
REALLOC_ARRAY(check->all_attrs, map->map.size);
172-
check->all_attrs_nr = map->map.size;
173+
REALLOC_ARRAY(check->all_attrs, size);
174+
check->all_attrs_nr = size;
173175

174176
while ((e = hashmap_iter_next(&iter))) {
175177
const struct git_attr *a = e->value;
@@ -237,10 +239,11 @@ static const struct git_attr *git_attr_internal(const char *name, int namelen)
237239

238240
if (!a) {
239241
FLEX_ALLOC_MEM(a, name, name, namelen);
240-
a->attr_nr = g_attr_hashmap.map.size;
242+
a->attr_nr = hashmap_get_size(&g_attr_hashmap.map);
241243

242244
attr_hashmap_add(&g_attr_hashmap, a->name, namelen, a);
243-
assert(a->attr_nr == (g_attr_hashmap.map.size - 1));
245+
assert(a->attr_nr ==
246+
(hashmap_get_size(&g_attr_hashmap.map) - 1));
244247
}
245248

246249
hashmap_unlock(&g_attr_hashmap);

builtin/describe.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -508,7 +508,7 @@ int cmd_describe(int argc, const char **argv, const char *prefix)
508508

509509
hashmap_init(&names, commit_name_cmp, NULL, 0);
510510
for_each_rawref(get_name, NULL);
511-
if (!names.size && !always)
511+
if (!hashmap_get_size(&names) && !always)
512512
die(_("No names found, cannot describe anything."));
513513

514514
if (argc == 0) {

hashmap.c

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -116,9 +116,6 @@ static void rehash(struct hashmap *map, unsigned int newsize)
116116
unsigned int i, oldsize = map->tablesize;
117117
struct hashmap_entry **oldtable = map->table;
118118

119-
if (map->disallow_rehash)
120-
return;
121-
122119
alloc_table(map, newsize);
123120
for (i = 0; i < oldsize; i++) {
124121
struct hashmap_entry *e = oldtable[i];
@@ -166,6 +163,12 @@ void hashmap_init(struct hashmap *map, hashmap_cmp_fn equals_function,
166163
while (initial_size > size)
167164
size <<= HASHMAP_RESIZE_BITS;
168165
alloc_table(map, size);
166+
167+
/*
168+
* Keep track of the number of items in the map and
169+
* allow the map to automatically grow as necessary.
170+
*/
171+
map->do_count_items = 1;
169172
}
170173

171174
void hashmap_free(struct hashmap *map, int free_entries)
@@ -206,9 +209,11 @@ void hashmap_add(struct hashmap *map, void *entry)
206209
map->table[b] = entry;
207210

208211
/* fix size and rehash if appropriate */
209-
map->size++;
210-
if (map->size > map->grow_at)
211-
rehash(map, map->tablesize << HASHMAP_RESIZE_BITS);
212+
if (map->do_count_items) {
213+
map->private_size++;
214+
if (map->private_size > map->grow_at)
215+
rehash(map, map->tablesize << HASHMAP_RESIZE_BITS);
216+
}
212217
}
213218

214219
void *hashmap_remove(struct hashmap *map, const void *key, const void *keydata)
@@ -224,9 +229,12 @@ void *hashmap_remove(struct hashmap *map, const void *key, const void *keydata)
224229
old->next = NULL;
225230

226231
/* fix size and rehash if appropriate */
227-
map->size--;
228-
if (map->size < map->shrink_at)
229-
rehash(map, map->tablesize >> HASHMAP_RESIZE_BITS);
232+
if (map->do_count_items) {
233+
map->private_size--;
234+
if (map->private_size < map->shrink_at)
235+
rehash(map, map->tablesize >> HASHMAP_RESIZE_BITS);
236+
}
237+
230238
return old;
231239
}
232240

hashmap.h

Lines changed: 51 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ struct hashmap {
183183
const void *cmpfn_data;
184184

185185
/* total number of entries (0 means the hashmap is empty) */
186-
unsigned int size;
186+
unsigned int private_size; /* use hashmap_get_size() */
187187

188188
/*
189189
* tablesize is the allocated size of the hash table. A non-0 value
@@ -196,8 +196,7 @@ struct hashmap {
196196
unsigned int grow_at;
197197
unsigned int shrink_at;
198198

199-
/* See `hashmap_disallow_rehash`. */
200-
unsigned disallow_rehash : 1;
199+
unsigned int do_count_items : 1;
201200
};
202201

203202
/* hashmap functions */
@@ -252,6 +251,18 @@ static inline void hashmap_entry_init(void *entry, unsigned int hash)
252251
e->next = NULL;
253252
}
254253

254+
/*
255+
* Return the number of items in the map.
256+
*/
257+
static inline unsigned int hashmap_get_size(struct hashmap *map)
258+
{
259+
if (map->do_count_items)
260+
return map->private_size;
261+
262+
BUG("hashmap_get_size: size not set");
263+
return 0;
264+
}
265+
255266
/*
256267
* Returns the hashmap entry for the specified key, or NULL if not found.
257268
*
@@ -344,24 +355,6 @@ extern void *hashmap_remove(struct hashmap *map, const void *key,
344355
*/
345356
int hashmap_bucket(const struct hashmap *map, unsigned int hash);
346357

347-
/*
348-
* Disallow/allow rehashing of the hashmap.
349-
* This is useful if the caller knows that the hashmap needs multi-threaded
350-
* access. The caller is still required to guard/lock searches and inserts
351-
* in a manner appropriate to their usage. This simply prevents the table
352-
* from being unexpectedly re-mapped.
353-
*
354-
* It is up to the caller to ensure that the hashmap is initialized to a
355-
* reasonable size to prevent poor performance.
356-
*
357-
* A call to allow rehashing does not force a rehash; that might happen
358-
* with the next insert or delete.
359-
*/
360-
static inline void hashmap_disallow_rehash(struct hashmap *map, unsigned value)
361-
{
362-
map->disallow_rehash = value;
363-
}
364-
365358
/*
366359
* Used to iterate over all entries of a hashmap. Note that it is
367360
* not safe to add or remove entries to the hashmap while
@@ -387,6 +380,43 @@ static inline void *hashmap_iter_first(struct hashmap *map,
387380
return hashmap_iter_next(iter);
388381
}
389382

383+
/*
384+
* Disable item counting and automatic rehashing when adding/removing items.
385+
*
386+
* Normally, the hashmap keeps track of the number of items in the map
387+
* and uses it to dynamically resize it. This (both the counting and
388+
* the resizing) can cause problems when the map is being used by
389+
* threaded callers (because the hashmap code does not know about the
390+
* locking strategy used by the threaded callers and therefore, does
391+
* not know how to protect the "private_size" counter).
392+
*/
393+
static inline void hashmap_disable_item_counting(struct hashmap *map)
394+
{
395+
map->do_count_items = 0;
396+
}
397+
398+
/*
399+
* Re-enable item couting when adding/removing items.
400+
* If counting is currently disabled, it will force count them.
401+
* It WILL NOT automatically rehash them.
402+
*/
403+
static inline void hashmap_enable_item_counting(struct hashmap *map)
404+
{
405+
void *item;
406+
unsigned int n = 0;
407+
struct hashmap_iter iter;
408+
409+
if (map->do_count_items)
410+
return;
411+
412+
hashmap_iter_init(map, &iter);
413+
while ((item = hashmap_iter_next(&iter)))
414+
n++;
415+
416+
map->do_count_items = 1;
417+
map->private_size = n;
418+
}
419+
390420
/* String interning */
391421

392422
/*

name-hash.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -584,9 +584,15 @@ static void lazy_init_name_hash(struct index_state *istate)
584584
hashmap_init(&istate->dir_hash, dir_entry_cmp, NULL, istate->cache_nr);
585585

586586
if (lookup_lazy_params(istate)) {
587-
hashmap_disallow_rehash(&istate->dir_hash, 1);
587+
/*
588+
* Disable item counting and automatic rehashing because
589+
* we do per-chain (mod n) locking rather than whole hashmap
590+
* locking and we need to prevent the table-size from changing
591+
* and bucket items from being redistributed.
592+
*/
593+
hashmap_disable_item_counting(&istate->dir_hash);
588594
threaded_lazy_init_name_hash(istate);
589-
hashmap_disallow_rehash(&istate->dir_hash, 0);
595+
hashmap_enable_item_counting(&istate->dir_hash);
590596
} else {
591597
int nr;
592598
for (nr = 0; nr < istate->cache_nr; nr++)

t/helper/test-hashmap.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,8 @@ int cmd_main(int argc, const char **argv)
235235
} else if (!strcmp("size", cmd)) {
236236

237237
/* print table sizes */
238-
printf("%u %u\n", map.tablesize, map.size);
238+
printf("%u %u\n", map.tablesize,
239+
hashmap_get_size(&map));
239240

240241
} else if (!strcmp("intern", cmd) && l1) {
241242

0 commit comments

Comments
 (0)