Skip to content

Commit 56ceb64

Browse files
committed
Merge branch 'mt/threaded-grep-in-object-store'
Traditionally, we avoided threaded grep while searching in objects (as opposed to files in the working tree) as accesses to the object layer is not thread-safe. This limitation is getting lifted. * mt/threaded-grep-in-object-store: grep: use no. of cores as the default no. of threads grep: move driver pre-load out of critical section grep: re-enable threads in non-worktree case grep: protect packed_git [re-]initialization grep: allow submodule functions to run in parallel submodule-config: add skip_if_read option to repo_read_gitmodules() grep: replace grep_read_mutex by internal obj read lock object-store: allow threaded access to object reading replace-object: make replace operations thread-safe grep: fix racy calls in grep_objects() grep: fix race conditions at grep_submodule() grep: fix race conditions on userdiff calls
2 parents 0da63da + f1928f0 commit 56ceb64

14 files changed

+233
-98
lines changed

.tsan-suppressions

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,9 @@
88
# in practice it (hopefully!) doesn't matter.
99
race:^want_color$
1010
race:^transfer_debug$
11+
12+
# A boolean value, which tells whether the replace_map has been initialized or
13+
# not, is read racily with an update. As this variable is written to only once,
14+
# and it's OK if the value change right after reading it, this shouldn't be a
15+
# problem.
16+
race:^lookup_replace_object$

Documentation/git-grep.txt

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,8 @@ grep.extendedRegexp::
5959
other than 'default'.
6060

6161
grep.threads::
62-
Number of grep worker threads to use. If unset (or set to 0),
63-
8 threads are used by default (for now).
62+
Number of grep worker threads to use. If unset (or set to 0), Git will
63+
use as many threads as the number of logical cores available.
6464

6565
grep.fullName::
6666
If set to true, enable `--full-name` option by default.
@@ -348,6 +348,17 @@ EXAMPLES
348348
`git grep solution -- :^Documentation`::
349349
Looks for `solution`, excluding files in `Documentation`.
350350

351+
NOTES ON THREADS
352+
----------------
353+
354+
The `--threads` option (and the grep.threads configuration) will be ignored when
355+
`--open-files-in-pager` is used, forcing a single-threaded execution.
356+
357+
When grepping the object store (with `--cached` or giving tree objects), running
358+
with multiple threads might perform slower than single threaded if `--textconv`
359+
is given and there're too many text conversions. So if you experience low
360+
performance in this case, it might be desirable to use `--threads=1`.
361+
351362
GIT
352363
---
353364
Part of the linkgit:git[1] suite

builtin/grep.c

Lines changed: 46 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "submodule.h"
2525
#include "submodule-config.h"
2626
#include "object-store.h"
27+
#include "packfile.h"
2728

2829
static char const * const grep_usage[] = {
2930
N_("git grep [<options>] [-e] <pattern> [<rev>...] [[--] <path>...]"),
@@ -32,7 +33,6 @@ static char const * const grep_usage[] = {
3233

3334
static int recurse_submodules;
3435

35-
#define GREP_NUM_THREADS_DEFAULT 8
3636
static int num_threads;
3737

3838
static pthread_t *threads;
@@ -91,18 +91,18 @@ static pthread_cond_t cond_result;
9191

9292
static int skip_first_line;
9393

94-
static void add_work(struct grep_opt *opt, const struct grep_source *gs)
94+
static void add_work(struct grep_opt *opt, struct grep_source *gs)
9595
{
96+
if (opt->binary != GREP_BINARY_TEXT)
97+
grep_source_load_driver(gs, opt->repo->index);
98+
9699
grep_lock();
97100

98101
while ((todo_end+1) % ARRAY_SIZE(todo) == todo_done) {
99102
pthread_cond_wait(&cond_write, &grep_mutex);
100103
}
101104

102105
todo[todo_end].source = *gs;
103-
if (opt->binary != GREP_BINARY_TEXT)
104-
grep_source_load_driver(&todo[todo_end].source,
105-
opt->repo->index);
106106
todo[todo_end].done = 0;
107107
strbuf_reset(&todo[todo_end].out);
108108
todo_end = (todo_end + 1) % ARRAY_SIZE(todo);
@@ -200,12 +200,12 @@ static void start_threads(struct grep_opt *opt)
200200
int i;
201201

202202
pthread_mutex_init(&grep_mutex, NULL);
203-
pthread_mutex_init(&grep_read_mutex, NULL);
204203
pthread_mutex_init(&grep_attr_mutex, NULL);
205204
pthread_cond_init(&cond_add, NULL);
206205
pthread_cond_init(&cond_write, NULL);
207206
pthread_cond_init(&cond_result, NULL);
208207
grep_use_locks = 1;
208+
enable_obj_read_lock();
209209

210210
for (i = 0; i < ARRAY_SIZE(todo); i++) {
211211
strbuf_init(&todo[i].out, 0);
@@ -257,12 +257,12 @@ static int wait_all(void)
257257
free(threads);
258258

259259
pthread_mutex_destroy(&grep_mutex);
260-
pthread_mutex_destroy(&grep_read_mutex);
261260
pthread_mutex_destroy(&grep_attr_mutex);
262261
pthread_cond_destroy(&cond_add);
263262
pthread_cond_destroy(&cond_write);
264263
pthread_cond_destroy(&cond_result);
265264
grep_use_locks = 0;
265+
disable_obj_read_lock();
266266

267267
return hit;
268268
}
@@ -295,16 +295,6 @@ static int grep_cmd_config(const char *var, const char *value, void *cb)
295295
return st;
296296
}
297297

298-
static void *lock_and_read_oid_file(const struct object_id *oid, enum object_type *type, unsigned long *size)
299-
{
300-
void *data;
301-
302-
grep_read_lock();
303-
data = read_object_file(oid, type, size);
304-
grep_read_unlock();
305-
return data;
306-
}
307-
308298
static int grep_oid(struct grep_opt *opt, const struct object_id *oid,
309299
const char *filename, int tree_name_len,
310300
const char *path)
@@ -407,30 +397,28 @@ static int grep_submodule(struct grep_opt *opt,
407397
{
408398
struct repository subrepo;
409399
struct repository *superproject = opt->repo;
410-
const struct submodule *sub = submodule_from_path(superproject,
411-
&null_oid, path);
400+
const struct submodule *sub;
412401
struct grep_opt subopt;
413402
int hit;
414403

415-
/*
416-
* NEEDSWORK: submodules functions need to be protected because they
417-
* access the object store via config_from_gitmodules(): the latter
418-
* uses get_oid() which, for now, relies on the global the_repository
419-
* object.
420-
*/
421-
grep_read_lock();
404+
sub = submodule_from_path(superproject, &null_oid, path);
422405

423-
if (!is_submodule_active(superproject, path)) {
424-
grep_read_unlock();
406+
if (!is_submodule_active(superproject, path))
425407
return 0;
426-
}
427408

428-
if (repo_submodule_init(&subrepo, superproject, sub)) {
429-
grep_read_unlock();
409+
if (repo_submodule_init(&subrepo, superproject, sub))
430410
return 0;
431-
}
432411

433-
repo_read_gitmodules(&subrepo);
412+
/*
413+
* NEEDSWORK: repo_read_gitmodules() might call
414+
* add_to_alternates_memory() via config_from_gitmodules(). This
415+
* operation causes a race condition with concurrent object readings
416+
* performed by the worker threads. That's why we need obj_read_lock()
417+
* here. It should be removed once it's no longer necessary to add the
418+
* subrepo's odbs to the in-memory alternates list.
419+
*/
420+
obj_read_lock();
421+
repo_read_gitmodules(&subrepo, 0);
434422

435423
/*
436424
* NEEDSWORK: This adds the submodule's object directory to the list of
@@ -443,7 +431,7 @@ static int grep_submodule(struct grep_opt *opt,
443431
* object.
444432
*/
445433
add_to_alternates_memory(subrepo.objects->odb->path);
446-
grep_read_unlock();
434+
obj_read_unlock();
447435

448436
memcpy(&subopt, opt, sizeof(subopt));
449437
subopt.repo = &subrepo;
@@ -455,14 +443,12 @@ static int grep_submodule(struct grep_opt *opt,
455443
unsigned long size;
456444
struct strbuf base = STRBUF_INIT;
457445

446+
obj_read_lock();
458447
object = parse_object_or_die(oid, oid_to_hex(oid));
459-
460-
grep_read_lock();
448+
obj_read_unlock();
461449
data = read_object_with_reference(&subrepo,
462450
&object->oid, tree_type,
463451
&size, NULL);
464-
grep_read_unlock();
465-
466452
if (!data)
467453
die(_("unable to read tree (%s)"), oid_to_hex(&object->oid));
468454

@@ -587,7 +573,7 @@ static int grep_tree(struct grep_opt *opt, const struct pathspec *pathspec,
587573
void *data;
588574
unsigned long size;
589575

590-
data = lock_and_read_oid_file(&entry.oid, &type, &size);
576+
data = read_object_file(&entry.oid, &type, &size);
591577
if (!data)
592578
die(_("unable to read tree (%s)"),
593579
oid_to_hex(&entry.oid));
@@ -625,12 +611,9 @@ static int grep_object(struct grep_opt *opt, const struct pathspec *pathspec,
625611
struct strbuf base;
626612
int hit, len;
627613

628-
grep_read_lock();
629614
data = read_object_with_reference(opt->repo,
630615
&obj->oid, tree_type,
631616
&size, NULL);
632-
grep_read_unlock();
633-
634617
if (!data)
635618
die(_("unable to read tree (%s)"), oid_to_hex(&obj->oid));
636619

@@ -659,13 +642,18 @@ static int grep_objects(struct grep_opt *opt, const struct pathspec *pathspec,
659642

660643
for (i = 0; i < nr; i++) {
661644
struct object *real_obj;
645+
646+
obj_read_lock();
662647
real_obj = deref_tag(opt->repo, list->objects[i].item,
663648
NULL, 0);
649+
obj_read_unlock();
664650

665651
/* load the gitmodules file for this rev */
666652
if (recurse_submodules) {
667653
submodule_free(opt->repo);
654+
obj_read_lock();
668655
gitmodules_config_oid(&real_obj->oid);
656+
obj_read_unlock();
669657
}
670658
if (grep_object(opt, pathspec, real_obj, list->objects[i].name,
671659
list->objects[i].path)) {
@@ -1065,7 +1053,10 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
10651053
pathspec.recursive = 1;
10661054
pathspec.recurse_submodules = !!recurse_submodules;
10671055

1068-
if (list.nr || cached || show_in_pager) {
1056+
if (recurse_submodules && untracked)
1057+
die(_("--untracked not supported with --recurse-submodules"));
1058+
1059+
if (show_in_pager) {
10691060
if (num_threads > 1)
10701061
warning(_("invalid option combination, ignoring --threads"));
10711062
num_threads = 1;
@@ -1075,7 +1066,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
10751066
} else if (num_threads < 0)
10761067
die(_("invalid number of threads specified (%d)"), num_threads);
10771068
else if (num_threads == 0)
1078-
num_threads = HAVE_THREADS ? GREP_NUM_THREADS_DEFAULT : 1;
1069+
num_threads = HAVE_THREADS ? online_cpus() : 1;
10791070

10801071
if (num_threads > 1) {
10811072
if (!HAVE_THREADS)
@@ -1084,6 +1075,17 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
10841075
&& (opt.pre_context || opt.post_context ||
10851076
opt.file_break || opt.funcbody))
10861077
skip_first_line = 1;
1078+
1079+
/*
1080+
* Pre-read gitmodules (if not read already) and force eager
1081+
* initialization of packed_git to prevent racy lazy
1082+
* reading/initialization once worker threads are started.
1083+
*/
1084+
if (recurse_submodules)
1085+
repo_read_gitmodules(the_repository, 1);
1086+
if (startup_info->have_repository)
1087+
(void)get_packed_git(the_repository);
1088+
10871089
start_threads(&opt);
10881090
} else {
10891091
/*
@@ -1118,9 +1120,6 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
11181120
}
11191121
}
11201122

1121-
if (recurse_submodules && untracked)
1122-
die(_("--untracked not supported with --recurse-submodules"));
1123-
11241123
if (!show_in_pager && !opt.status_only)
11251124
setup_pager();
11261125

grep.c

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1540,11 +1540,6 @@ static inline void grep_attr_unlock(void)
15401540
pthread_mutex_unlock(&grep_attr_mutex);
15411541
}
15421542

1543-
/*
1544-
* Same as git_attr_mutex, but protecting the thread-unsafe object db access.
1545-
*/
1546-
pthread_mutex_t grep_read_mutex;
1547-
15481543
static int match_funcname(struct grep_opt *opt, struct grep_source *gs, char *bol, char *eol)
15491544
{
15501545
xdemitconf_t *xecfg = opt->priv;
@@ -1741,13 +1736,20 @@ static int fill_textconv_grep(struct repository *r,
17411736
}
17421737

17431738
/*
1744-
* fill_textconv is not remotely thread-safe; it may load objects
1745-
* behind the scenes, and it modifies the global diff tempfile
1746-
* structure.
1739+
* fill_textconv is not remotely thread-safe; it modifies the global
1740+
* diff tempfile structure, writes to the_repo's odb and might
1741+
* internally call thread-unsafe functions such as the
1742+
* prepare_packed_git() lazy-initializator. Because of the last two, we
1743+
* must ensure mutual exclusion between this call and the object reading
1744+
* API, thus we use obj_read_lock() here.
1745+
*
1746+
* TODO: allowing text conversion to run in parallel with object
1747+
* reading operations might increase performance in the multithreaded
1748+
* non-worktreee git-grep with --textconv.
17471749
*/
1748-
grep_read_lock();
1750+
obj_read_lock();
17491751
size = fill_textconv(r, driver, df, &buf);
1750-
grep_read_unlock();
1752+
obj_read_unlock();
17511753
free_filespec(df);
17521754

17531755
/*
@@ -1813,10 +1815,15 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle
18131815
grep_source_load_driver(gs, opt->repo->index);
18141816
/*
18151817
* We might set up the shared textconv cache data here, which
1816-
* is not thread-safe.
1818+
* is not thread-safe. Also, get_oid_with_context() and
1819+
* parse_object() might be internally called. As they are not
1820+
* currenty thread-safe and might be racy with object reading,
1821+
* obj_read_lock() must be called.
18171822
*/
18181823
grep_attr_lock();
1824+
obj_read_lock();
18191825
textconv = userdiff_get_textconv(opt->repo, gs->driver);
1826+
obj_read_unlock();
18201827
grep_attr_unlock();
18211828
}
18221829

@@ -2116,10 +2123,7 @@ static int grep_source_load_oid(struct grep_source *gs)
21162123
{
21172124
enum object_type type;
21182125

2119-
grep_read_lock();
21202126
gs->buf = read_object_file(gs->identifier, &type, &gs->size);
2121-
grep_read_unlock();
2122-
21232127
if (!gs->buf)
21242128
return error(_("'%s': unable to read %s"),
21252129
gs->name,

grep.h

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -220,18 +220,5 @@ int grep_threads_ok(const struct grep_opt *opt);
220220
*/
221221
extern int grep_use_locks;
222222
extern pthread_mutex_t grep_attr_mutex;
223-
extern pthread_mutex_t grep_read_mutex;
224-
225-
static inline void grep_read_lock(void)
226-
{
227-
if (grep_use_locks)
228-
pthread_mutex_lock(&grep_read_mutex);
229-
}
230-
231-
static inline void grep_read_unlock(void)
232-
{
233-
if (grep_use_locks)
234-
pthread_mutex_unlock(&grep_read_mutex);
235-
}
236223

237224
#endif

0 commit comments

Comments
 (0)