diff --git a/.gitignore b/.gitignore index 4470d7cfc0ae72..5ccc3d00dd1a7b 100644 --- a/.gitignore +++ b/.gitignore @@ -156,6 +156,7 @@ /git-show-branch /git-show-index /git-show-ref +/git-sparse-checkout /git-stage /git-stash /git-status diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt index 75538d27e7e06b..9b8ab2a6d47c7b 100644 --- a/Documentation/config/core.txt +++ b/Documentation/config/core.txt @@ -591,8 +591,11 @@ core.multiPackIndex:: multi-pack-index design document]. core.sparseCheckout:: - Enable "sparse checkout" feature. See section "Sparse checkout" in - linkgit:git-read-tree[1] for more information. + Enable "sparse checkout" feature. If "false", then sparse-checkout + is disabled. If "true", then sparse-checkout is enabled with the full + .gitignore pattern set. If "cone", then sparse-checkout is enabled with + a restricted pattern set. See linkgit:git-sparse-checkout[1] for more + information. core.abbrev:: Set the length object names are abbreviated to. If diff --git a/Documentation/git-clone.txt b/Documentation/git-clone.txt index 5fc97f14de4deb..03299a8adba081 100644 --- a/Documentation/git-clone.txt +++ b/Documentation/git-clone.txt @@ -15,7 +15,7 @@ SYNOPSIS [--dissociate] [--separate-git-dir ] [--depth ] [--[no-]single-branch] [--no-tags] [--recurse-submodules[=]] [--[no-]shallow-submodules] - [--[no-]remote-submodules] [--jobs ] [--] + [--[no-]remote-submodules] [--jobs ] [--sparse] [--] [] DESCRIPTION @@ -156,6 +156,12 @@ objects from the source repository into a pack in the cloned repository. used, neither remote-tracking branches nor the related configuration variables are created. +--sparse:: + Initialize the sparse-checkout file so the working + directory starts with only the files in the root + of the repository. The sparse-checkout file can be + modified to grow the working directory as needed. + --mirror:: Set up a mirror of the source repository. This implies `--bare`. Compared to `--bare`, `--mirror` not only maps local branches of the diff --git a/Documentation/git-read-tree.txt b/Documentation/git-read-tree.txt index d2718426085613..da33f84f33d2c5 100644 --- a/Documentation/git-read-tree.txt +++ b/Documentation/git-read-tree.txt @@ -436,7 +436,7 @@ support. SEE ALSO -------- linkgit:git-write-tree[1]; linkgit:git-ls-files[1]; -linkgit:gitignore[5] +linkgit:gitignore[5]; linkgit:git-sparse-checkout[1]; GIT --- diff --git a/Documentation/git-sparse-checkout.txt b/Documentation/git-sparse-checkout.txt new file mode 100644 index 00000000000000..a6711ac59075be --- /dev/null +++ b/Documentation/git-sparse-checkout.txt @@ -0,0 +1,148 @@ +git-sparse-checkout(1) +======================= + +NAME +---- +git-sparse-checkout - Initialize and modify the sparse-checkout +configuration, which reduces the checkout to a set of directories +given by a list of prefixes. + + +SYNOPSIS +-------- +[verse] +'git sparse-checkout [options]' + + +DESCRIPTION +----------- + +Initialize and modify the sparse-checkout configuration, which reduces +the checkout to a set of directories given by a list of prefixes. + + +COMMANDS +-------- +'list':: + Provide a list of the contents in the sparse-checkout file. + +'init':: + Enable the `core.sparseCheckout` setting. If the + sparse-checkout file does not exist, then populate it with + patterns that match every file in the root directory and + no other directories, then will remove all directories tracked + by Git. Add patterns to the sparse-checkout file to + repopulate the working directory. + +'set':: + Write a set of patterns to the sparse-checkout file, as given as + a list of arguments following the 'set' subcommand. Update the + working directory to match the new patterns. + +'disable':: + Remove the sparse-checkout file, set `core.sparseCheckout` to + `false`, and restore the working directory to include all files. + +SPARSE CHECKOUT +---------------- + +"Sparse checkout" allows populating the working directory sparsely. +It uses the skip-worktree bit (see linkgit:git-update-index[1]) to tell +Git whether a file in the working directory is worth looking at. If +the skip-worktree bit is set, then the file is ignored in the working +directory. Git will not populate the contents of those files, which +makes a sparse checkout helpful when working in a repository with many +files, but only a few are important to the current user. + +The `$GIT_DIR/info/sparse-checkout` file is used to define the +skip-worktree reference bitmap. When Git updates the working +directory, it resets the skip-worktree bit in the index based on this +file. If an entry +matches a pattern in this file, skip-worktree will not be set on +that entry. Otherwise, skip-worktree will be set. + +Then it compares the new skip-worktree value with the previous one. If +skip-worktree turns from set to unset, it will add the corresponding +file back. If it turns from unset to set, that file will be removed. + +To repopulate the working directory with all files, use the +`git sparse-checkout disable` command. + +Sparse checkout support in 'git checkout' and similar commands is +disabled by default. You need to set `core.sparseCheckout` to `true` +in order to have sparse checkout support. + +## FULL PATTERN SET + +By default, the sparse-checkout file uses the same syntax as `.gitignore` +files. + +While `$GIT_DIR/info/sparse-checkout` is usually used to specify what +files are included, you can also specify what files are _not_ included, +using negative patterns. For example, to remove the file `unwanted`: + +---------------- +/* +!unwanted +---------------- + + +## CONE PATTERN SET + +The full pattern set allows for arbitrary pattern matches and complicated +inclusion/exclusion rules. These can result in O(N*M) pattern matches when +updating the index, where N is the number of patterns and M is the number +of paths in the index. To combat this performance issue, a more restricted +pattern set is allowed when `core.spareCheckoutCone` is enabled. + +The accepted patterns in the cone pattern set are: + +1. *Recursive:* All paths inside a directory are included. + +2. *Parent:* All files immediately inside a directory are included. + +In addition to the above two patterns, we also expect that all files in the +root directory are included. If a recursive pattern is added, then all +leading directories are added as parent patterns. + +By default, when running `git sparse-checkout init`, the root directory is +added as a parent pattern. At this point, the sparse-checkout file contains +the following patterns: + +``` +/* +!/*/* +``` + +This says "include everything in root, but nothing two levels below root." +If we then add the folder `A/B/C` as a recursive pattern, the folders `A` and +`A/B` are added as parent patterns. The resulting sparse-checkout file is +now + +``` +/* +!/*/* +/A/* +!/A/*/* +/A/B/* +!/A/B/*/* +/A/B/C/* +``` + +Here, order matters, so the negative patterns are overridden by the positive +patterns that appear lower in the file. + +If `core.sparseCheckoutCone=true`, then Git will parse the sparse-checkout file +expecting patterns of these types. Git will warn if the patterns do not match. +If the patterns do match the expected format, then Git will use faster hash- +based algorithms to compute inclusion in the sparse-checkout. + +SEE ALSO +-------- + +linkgit:git-read-tree[1] +linkgit:gitignore[5] + +GIT +--- +Part of the linkgit:git[1] suite diff --git a/Makefile b/Makefile index f58bf14c7bf3d9..f3322b75dd03f9 100644 --- a/Makefile +++ b/Makefile @@ -1121,6 +1121,7 @@ BUILTIN_OBJS += builtin/shortlog.o BUILTIN_OBJS += builtin/show-branch.o BUILTIN_OBJS += builtin/show-index.o BUILTIN_OBJS += builtin/show-ref.o +BUILTIN_OBJS += builtin/sparse-checkout.o BUILTIN_OBJS += builtin/stash.o BUILTIN_OBJS += builtin/stripspace.o BUILTIN_OBJS += builtin/submodule--helper.o diff --git a/builtin.h b/builtin.h index ec7e0954c4c8a1..d517068faaafa9 100644 --- a/builtin.h +++ b/builtin.h @@ -223,6 +223,7 @@ int cmd_shortlog(int argc, const char **argv, const char *prefix); int cmd_show(int argc, const char **argv, const char *prefix); int cmd_show_branch(int argc, const char **argv, const char *prefix); int cmd_show_index(int argc, const char **argv, const char *prefix); +int cmd_sparse_checkout(int argc, const char **argv, const char *prefix); int cmd_status(int argc, const char **argv, const char *prefix); int cmd_stash(int argc, const char **argv, const char *prefix); int cmd_stripspace(int argc, const char **argv, const char *prefix); diff --git a/builtin/clone.c b/builtin/clone.c index a693e6ca44c8fd..16f4e8b6fd9a27 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -58,6 +58,7 @@ static const char *real_git_dir; static char *option_upload_pack = "git-upload-pack"; static int option_verbosity; static int option_progress = -1; +static int option_sparse_checkout; static enum transport_family family; static struct string_list option_config = STRING_LIST_INIT_NODUP; static struct string_list option_required_reference = STRING_LIST_INIT_NODUP; @@ -145,6 +146,8 @@ static struct option builtin_clone_options[] = { OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options), OPT_BOOL(0, "remote-submodules", &option_remote_submodules, N_("any cloned submodules will use their remote-tracking branch")), + OPT_BOOL(0, "sparse", &option_sparse_checkout, + N_("initialize sparse-checkout file to include only files at root")), OPT_END() }; @@ -723,6 +726,27 @@ static void update_head(const struct ref *our, const struct ref *remote, } } +static int git_sparse_checkout_init(const char *repo) +{ + struct argv_array argv = ARGV_ARRAY_INIT; + int result = 0; + argv_array_pushl(&argv, "-C", repo, "sparse-checkout", "init", NULL); + + /* + * We must apply the setting in the current process + * for the later checkout to use the sparse-checkout file. + */ + core_apply_sparse_checkout = 1; + + if (run_command_v_opt(argv.argv, RUN_GIT_CMD)) { + error(_("failed to initialize sparse-checkout")); + result = 1; + } + + argv_array_clear(&argv); + return result; +} + static int checkout(int submodule_progress) { struct object_id oid; @@ -1096,6 +1120,9 @@ int cmd_clone(int argc, const char **argv, const char *prefix) if (option_required_reference.nr || option_optional_reference.nr) setup_reference(); + if (option_sparse_checkout && git_sparse_checkout_init(repo)) + return 1; + remote = remote_get(option_origin); strbuf_addf(&default_refspec, "+%s*:%s*", src_ref_prefix, diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c new file mode 100644 index 00000000000000..b6056a9c3cc77b --- /dev/null +++ b/builtin/sparse-checkout.c @@ -0,0 +1,420 @@ +#include "builtin.h" +#include "config.h" +#include "dir.h" +#include "parse-options.h" +#include "pathspec.h" +#include "repository.h" +#include "run-command.h" +#include "strbuf.h" +#include "string-list.h" + +static char const * const builtin_sparse_checkout_usage[] = { + N_("git sparse-checkout [init|list|set|disable] "), + NULL +}; + +static char *get_sparse_checkout_filename(void) +{ + return git_pathdup("info/sparse-checkout"); +} + +static void write_patterns_to_file(FILE *fp, struct pattern_list *pl) +{ + int i; + + for (i = 0; i < pl->nr; i++) { + struct path_pattern *p = pl->patterns[i]; + + if (p->flags & PATTERN_FLAG_NEGATIVE) + fprintf(fp, "!"); + + fprintf(fp, "%s", p->pattern); + + if (p->flags & PATTERN_FLAG_MUSTBEDIR) + fprintf(fp, "/"); + + fprintf(fp, "\n"); + } +} + +static int sparse_checkout_list(int argc, const char **argv) +{ + struct pattern_list pl; + char *sparse_filename; + int res; + + memset(&pl, 0, sizeof(pl)); + + sparse_filename = get_sparse_checkout_filename(); + res = add_patterns_from_file_to_list(sparse_filename, "", 0, &pl, NULL); + free(sparse_filename); + + if (res < 0) { + warning(_("this worktree is not sparse (sparse-checkout file may not exist)")); + return 0; + } + + write_patterns_to_file(stdout, &pl); + clear_pattern_list(&pl); + + return 0; +} + +static int update_working_directory(void) +{ + struct argv_array argv = ARGV_ARRAY_INIT; + int result = 0; + argv_array_pushl(&argv, "read-tree", "-m", "-u", "HEAD", NULL); + + if (run_command_v_opt(argv.argv, RUN_GIT_CMD)) { + error(_("failed to update index with new sparse-checkout paths")); + result = 1; + } + + argv_array_clear(&argv); + return result; +} + +#define SPARSE_CHECKOUT_NONE 0 +#define SPARSE_CHECKOUT_FULL 1 +#define SPARSE_CHECKOUT_CONE 2 + +static int sc_set_config(int mode) +{ + struct argv_array argv = ARGV_ARRAY_INIT; + struct argv_array cone_argv = ARGV_ARRAY_INIT; + + if (git_config_set_gently("extensions.worktreeConfig", "true")) { + error(_("failed to set extensions.worktreeConfig setting")); + return 1; + } + + argv_array_pushl(&argv, "config", "--worktree", "core.sparseCheckout", NULL); + + switch (mode) { + case SPARSE_CHECKOUT_FULL: + case SPARSE_CHECKOUT_CONE: + argv_array_pushl(&argv, "true", NULL); + break; + + case SPARSE_CHECKOUT_NONE: + argv_array_pushl(&argv, "false", NULL); + break; + + default: + die(_("invalid config mode")); + } + + if (run_command_v_opt(argv.argv, RUN_GIT_CMD)) { + error(_("failed to enable core.sparseCheckout")); + return 1; + } + + argv_array_pushl(&cone_argv, "config", "--worktree", + "core.sparseCheckoutCone", NULL); + + if (mode == SPARSE_CHECKOUT_CONE) + argv_array_push(&cone_argv, "true"); + else + argv_array_push(&cone_argv, "false"); + + if (run_command_v_opt(cone_argv.argv, RUN_GIT_CMD)) { + error(_("failed to enable core.sparseCheckoutCone")); + return 1; + } + + return 0; +} + +static char const * const builtin_sparse_checkout_init_usage[] = { + N_("git sparse-checkout init [--cone]"), + NULL +}; + +static struct sparse_checkout_init_opts { + int cone_mode; +} init_opts; + +static int sparse_checkout_init(int argc, const char **argv) +{ + struct pattern_list pl; + char *sparse_filename; + FILE *fp; + int res; + struct object_id oid; + int mode; + + static struct option builtin_sparse_checkout_init_options[] = { + OPT_BOOL(0, "cone", &init_opts.cone_mode, + N_("initialize the sparse-checkout in cone mode")), + OPT_END(), + }; + + argc = parse_options(argc, argv, NULL, + builtin_sparse_checkout_init_options, + builtin_sparse_checkout_init_usage, 0); + + mode = init_opts.cone_mode ? SPARSE_CHECKOUT_CONE : SPARSE_CHECKOUT_FULL; + + if (sc_set_config(mode)) + return 1; + + memset(&pl, 0, sizeof(pl)); + + sparse_filename = get_sparse_checkout_filename(); + res = add_patterns_from_file_to_list(sparse_filename, "", 0, &pl, NULL); + + /* If we already have a sparse-checkout file, use it. */ + if (res >= 0) { + free(sparse_filename); + goto reset_dir; + } + + /* initial mode: all blobs at root */ + fp = fopen(sparse_filename, "w"); + free(sparse_filename); + fprintf(fp, "/*\n!/*/\n"); + fclose(fp); + + if (get_oid("HEAD", &oid)) { + /* assume we are in a fresh repo */ + return 0; + } + +reset_dir: + return update_working_directory(); +} + +static void insert_recursive_pattern(struct pattern_list *pl, struct strbuf *path) +{ + struct pattern_entry *e = xmalloc(sizeof(struct pattern_entry)); + e->patternlen = path->len; + e->pattern = strbuf_detach(path, NULL); + hashmap_entry_init(e, memhash(e->pattern, e->patternlen)); + + hashmap_add(&pl->recursive_hashmap, e); + + while (e->patternlen) { + char *slash = strrchr(e->pattern, '/'); + char *oldpattern = e->pattern; + size_t newlen; + + if (!slash) + break; + + newlen = slash - e->pattern; + e = xmalloc(sizeof(struct pattern_entry)); + e->patternlen = newlen; + e->pattern = xstrndup(oldpattern, newlen); + hashmap_entry_init(e, memhash(e->pattern, e->patternlen)); + + if (!hashmap_get(&pl->parent_hashmap, e, NULL)) + hashmap_add(&pl->parent_hashmap, e); + } +} + +static void write_cone_to_file(FILE *fp, struct pattern_list *pl) +{ + int i; + struct pattern_entry *entry; + struct hashmap_iter iter; + struct string_list sl = STRING_LIST_INIT_DUP; + + hashmap_iter_init(&pl->parent_hashmap, &iter); + while ((entry = hashmap_iter_next(&iter))) { + char *pattern = xstrdup(entry->pattern); + char *converted = pattern; + if (pattern[0] == '/') + converted++; + if (pattern[entry->patternlen - 1] == '/') + pattern[entry->patternlen - 1] = 0; + string_list_insert(&sl, converted); + free(pattern); + } + + string_list_sort(&sl); + string_list_remove_duplicates(&sl, 0); + + fprintf(fp, "/*\n!/*/\n"); + + for (i = 0; i < sl.nr; i++) { + char *pattern = sl.items[i].string; + + if (strlen(pattern)) + fprintf(fp, "/%s/\n!/%s/*/\n", pattern, pattern); + } + + string_list_clear(&sl, 0); + + hashmap_iter_init(&pl->recursive_hashmap, &iter); + while ((entry = hashmap_iter_next(&iter))) { + char *pattern = xstrdup(entry->pattern); + char *converted = pattern; + if (pattern[0] == '/') + converted++; + if (pattern[entry->patternlen - 1] == '/') + pattern[entry->patternlen - 1] = 0; + string_list_insert(&sl, converted); + free(pattern); + } + + string_list_sort(&sl); + string_list_remove_duplicates(&sl, 0); + + for (i = 0; i < sl.nr; i++) { + char *pattern = sl.items[i].string; + fprintf(fp, "/%s/\n", pattern); + } +} + +static int write_patterns_and_update(struct pattern_list *pl) +{ + char *sparse_filename; + FILE *fp; + + sparse_filename = get_sparse_checkout_filename(); + fp = fopen(sparse_filename, "w"); + + if (core_sparse_checkout_cone) + write_cone_to_file(fp, pl); + else + write_patterns_to_file(fp, pl); + + fclose(fp); + free(sparse_filename); + + clear_pattern_list(pl); + return update_working_directory(); +} + +static void strbuf_to_cone_pattern(struct strbuf *line, struct pattern_list *pl) +{ + strbuf_trim(line); + + strbuf_trim_trailing_dir_sep(line); + + if (!line->len) + return; + + if (line->buf[0] == '/') + strbuf_remove(line, 0, 1); + + if (!line->len) + return; + + insert_recursive_pattern(pl, line); +} + +static char const * const builtin_sparse_checkout_set_usage[] = { + N_("git sparse-checkout set [--stdin|]"), + NULL +}; + +static struct sparse_checkout_set_opts { + int use_stdin; +} set_opts; + +static int sparse_checkout_set(int argc, const char **argv, const char *prefix) +{ + int i; + struct pattern_list pl; + + static struct option builtin_sparse_checkout_set_options[] = { + OPT_BOOL(0, "stdin", &set_opts.use_stdin, + N_("read patterns from standard in")), + OPT_END(), + }; + + memset(&pl, 0, sizeof(pl)); + + argc = parse_options(argc, argv, prefix, + builtin_sparse_checkout_set_options, + builtin_sparse_checkout_set_usage, + PARSE_OPT_KEEP_UNKNOWN); + + if (core_sparse_checkout_cone) { + struct strbuf line = STRBUF_INIT; + hashmap_init(&pl.recursive_hashmap, pl_hashmap_cmp, NULL, 0); + hashmap_init(&pl.parent_hashmap, pl_hashmap_cmp, NULL, 0); + + if (set_opts.use_stdin) { + while (!strbuf_getline(&line, stdin)) + strbuf_to_cone_pattern(&line, &pl); + } else { + for (i = 0; i < argc; i++) { + strbuf_setlen(&line, 0); + strbuf_addstr(&line, argv[i]); + strbuf_to_cone_pattern(&line, &pl); + } + } + } else { + if (set_opts.use_stdin) { + struct strbuf line = STRBUF_INIT; + + while (!strbuf_getline(&line, stdin)) { + size_t len; + char *buf = strbuf_detach(&line, &len); + add_pattern(buf, buf, len, &pl, 0); + } + } else { + for (i = 0; i < argc; i++) + add_pattern(argv[i], argv[i], strlen(argv[i]), &pl, 0); + } + } + + return write_patterns_and_update(&pl); +} + +static int sparse_checkout_disable(int argc, const char **argv) +{ + char *sparse_filename; + FILE *fp; + + if (sc_set_config(SPARSE_CHECKOUT_FULL)) + die(_("failed to change config")); + + sparse_filename = get_sparse_checkout_filename(); + fp = fopen(sparse_filename, "w"); + fprintf(fp, "/*\n"); + fclose(fp); + + if (update_working_directory()) + die(_("error while refreshing working directory")); + + unlink(sparse_filename); + free(sparse_filename); + + return sc_set_config(SPARSE_CHECKOUT_NONE); +} + +int cmd_sparse_checkout(int argc, const char **argv, const char *prefix) +{ + static struct option builtin_sparse_checkout_options[] = { + OPT_END(), + }; + + if (argc == 2 && !strcmp(argv[1], "-h")) + usage_with_options(builtin_sparse_checkout_usage, + builtin_sparse_checkout_options); + + argc = parse_options(argc, argv, prefix, + builtin_sparse_checkout_options, + builtin_sparse_checkout_usage, + PARSE_OPT_STOP_AT_NON_OPTION); + + git_config(git_default_config, NULL); + + if (argc > 0) { + if (!strcmp(argv[0], "list")) + return sparse_checkout_list(argc, argv); + if (!strcmp(argv[0], "init")) + return sparse_checkout_init(argc, argv); + if (!strcmp(argv[0], "set")) + return sparse_checkout_set(argc, argv, prefix); + if (!strcmp(argv[0], "disable")) + return sparse_checkout_disable(argc, argv); + } + + usage_with_options(builtin_sparse_checkout_usage, + builtin_sparse_checkout_options); +} diff --git a/cache.h b/cache.h index cf5d70c19622c6..8e8ea67efa250e 100644 --- a/cache.h +++ b/cache.h @@ -911,12 +911,14 @@ extern char *git_replace_ref_base; extern int fsync_object_files; extern int core_preload_index; -extern int core_apply_sparse_checkout; extern int precomposed_unicode; extern int protect_hfs; extern int protect_ntfs; extern const char *core_fsmonitor; +int core_apply_sparse_checkout; +int core_sparse_checkout_cone; + /* * Include broken refs in all ref iterations, which will * generally choke dangerous operations rather than letting diff --git a/config.c b/config.c index 296a6d9cc4110b..f65c74f5b7db91 100644 --- a/config.c +++ b/config.c @@ -1329,6 +1329,11 @@ static int git_default_core_config(const char *var, const char *value, void *cb) return 0; } + if (!strcmp(var, "core.sparsecheckoutcone")) { + core_sparse_checkout_cone = git_config_bool(var, value); + return 0; + } + if (!strcmp(var, "core.precomposeunicode")) { precomposed_unicode = git_config_bool(var, value); return 0; diff --git a/dir.c b/dir.c index 34972abdaf1d5b..f6953dd29a7fcd 100644 --- a/dir.c +++ b/dir.c @@ -599,6 +599,107 @@ void parse_path_pattern(const char **pattern, *patternlen = len; } +int pl_hashmap_cmp(const void *unused_cmp_data, + const void *a, const void *b, const void *key) +{ + const struct pattern_entry *ee1 = (const struct pattern_entry *)a; + const struct pattern_entry *ee2 = (const struct pattern_entry *)b; + + size_t min_len = ee1->patternlen <= ee2->patternlen + ? ee1->patternlen + : ee2->patternlen; + + return strncmp(ee1->pattern, ee2->pattern, min_len); +} + +static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern *given) +{ + struct pattern_entry *translated; + char *truncated; + char *data = NULL; + + if (!pl->use_cone_patterns) + return; + + if (given->patternlen > 2 && + !strcmp(given->pattern + given->patternlen - 2, "/*")) { + if (!(given->flags & PATTERN_FLAG_NEGATIVE)) { + /* Not a cone pattern. */ + pl->use_cone_patterns = 0; + warning(_("unrecognized pattern: '%s'"), given->pattern); + goto clear_hashmaps; + } + + truncated = xstrdup(given->pattern); + truncated[given->patternlen - 3] = 0; + + translated = xmalloc(sizeof(struct pattern_entry)); + translated->pattern = truncated; + translated->patternlen = given->patternlen - 3; + hashmap_entry_init(translated, + memhash(translated->pattern, translated->patternlen)); + + if (!hashmap_get(&pl->recursive_hashmap, translated, NULL)) { + /* We did not see the "parent" included */ + warning(_("unrecognized negative pattern: '%s'"), + given->pattern); + free(truncated); + free(translated); + goto clear_hashmaps; + } + + hashmap_add(&pl->parent_hashmap, translated); + hashmap_remove(&pl->recursive_hashmap, translated, &data); + free(data); + return; + } + + if (given->flags & PATTERN_FLAG_NEGATIVE) { + warning(_("unrecognized negative pattern: '%s'"), + given->pattern); + goto clear_hashmaps; + } + + translated = xmalloc(sizeof(struct pattern_entry)); + + truncated = xstrdup(given->pattern); + truncated[given->patternlen - 1] = 0; + translated->pattern = truncated; + translated->patternlen = given->patternlen - 1; + hashmap_entry_init(translated, + memhash(translated->pattern, translated->patternlen)); + + hashmap_add(&pl->recursive_hashmap, translated); + + if (hashmap_get(&pl->parent_hashmap, translated, NULL)) { + /* we already included this at the parent level */ + warning(_("your sparse-checkout file may have issues: pattern '%s' is repeated"), + given->pattern); + hashmap_remove(&pl->parent_hashmap, translated, &data); + free(data); + free(translated); + } + + return; + +clear_hashmaps: + hashmap_free(&pl->parent_hashmap, 1); + hashmap_free(&pl->recursive_hashmap, 1); + pl->use_cone_patterns = 0; +} + +static int hashmap_contains_path(struct hashmap *map, + struct strbuf *pattern) +{ + struct pattern_entry p; + + /* Check straight mapping */ + p.pattern = pattern->buf; + p.patternlen = pattern->len; + hashmap_entry_init(&p, memhash(p.pattern, p.patternlen)); + return !!hashmap_get(map, &p, NULL); +} + void add_pattern(const char *string, const char *base, int baselen, struct pattern_list *pl, int srcpos) { @@ -623,6 +724,8 @@ void add_pattern(const char *string, const char *base, ALLOC_GROW(pl->patterns, pl->nr + 1, pl->alloc); pl->patterns[pl->nr++] = pattern; pattern->pl = pl; + + add_pattern_to_hashsets(pl, pattern); } static int read_skip_worktree_file_from_index(const struct index_state *istate, @@ -848,6 +951,10 @@ static int add_patterns_from_buffer(char *buf, size_t size, int i, lineno = 1; char *entry; + pl->use_cone_patterns = core_sparse_checkout_cone; + hashmap_init(&pl->recursive_hashmap, pl_hashmap_cmp, NULL, 0); + hashmap_init(&pl->parent_hashmap, pl_hashmap_cmp, NULL, 0); + pl->filebuf = buf; if (skip_utf8_bom(&buf, size)) @@ -1084,16 +1191,64 @@ enum pattern_match_result path_matches_pattern_list( struct index_state *istate) { struct path_pattern *pattern; - pattern = last_matching_pattern_from_list(pathname, pathlen, basename, - dtype, pl, istate); - if (pattern) { - if (pattern->flags & PATTERN_FLAG_NEGATIVE) - return NOT_MATCHED; - else - return MATCHED; + struct strbuf parent_pathname = STRBUF_INIT; + int result = NOT_MATCHED; + const char *slash_pos; + + if (!pl->use_cone_patterns) { + pattern = last_matching_pattern_from_list(pathname, pathlen, basename, + dtype, pl, istate); + if (pattern) { + if (pattern->flags & PATTERN_FLAG_NEGATIVE) + return NOT_MATCHED; + else + return MATCHED; + } + + return UNDECIDED; + } + + strbuf_addch(&parent_pathname, '/'); + strbuf_add(&parent_pathname, pathname, pathlen); + + if (hashmap_contains_path(&pl->recursive_hashmap, + &parent_pathname)) { + result = MATCHED_RECURSIVE; + goto done; + } + + slash_pos = strrchr(parent_pathname.buf, '/'); + + if (slash_pos == parent_pathname.buf) { + /* include every file in root */ + result = MATCHED; + goto done; + } + + strbuf_setlen(&parent_pathname, slash_pos - parent_pathname.buf); + + if (hashmap_contains_path(&pl->parent_hashmap, &parent_pathname)) { + result = MATCHED; + goto done; } - return UNDECIDED; + while (parent_pathname.len) { + if (hashmap_contains_path(&pl->recursive_hashmap, + &parent_pathname)) { + result = MATCHED_RECURSIVE; + goto done; + } + + slash_pos = strrchr(parent_pathname.buf, '/'); + if (slash_pos == parent_pathname.buf) + break; + + strbuf_setlen(&parent_pathname, slash_pos - parent_pathname.buf); + } + +done: + strbuf_release(&parent_pathname); + return result; } static struct path_pattern *last_matching_pattern_from_lists( diff --git a/dir.h b/dir.h index 608696c9583100..5f410eedbb215c 100644 --- a/dir.h +++ b/dir.h @@ -4,6 +4,7 @@ /* See Documentation/technical/api-directory-listing.txt */ #include "cache.h" +#include "hashmap.h" #include "strbuf.h" struct dir_entry { @@ -37,6 +38,13 @@ struct path_pattern { int srcpos; }; +/* used for hashmaps for cone patterns */ +struct pattern_entry { + struct hashmap_entry ent; + char *pattern; + size_t patternlen; +}; + /* * Each excludes file will be parsed into a fresh exclude_list which * is appended to the relevant exclude_list_group (either EXC_DIRS or @@ -55,6 +63,25 @@ struct pattern_list { const char *src; struct path_pattern **patterns; + + /* + * While scanning the excludes, we attempt to match the patterns + * with a more restricted set that allows us to use hashsets for + * matching logic, which is faster than the linear lookup in the + * excludes array above. If non-zero, that check succeeded. + */ + unsigned use_cone_patterns; + + /* + * Stores paths where everything starting with those paths + * is included. + */ + struct hashmap recursive_hashmap; + + /* + * Used to check single-level parents of blobs. + */ + struct hashmap parent_hashmap; }; /* @@ -234,6 +261,7 @@ enum pattern_match_result { UNDECIDED = -1, NOT_MATCHED = 0, MATCHED = 1, + MATCHED_RECURSIVE = 2, }; /* @@ -269,6 +297,9 @@ int is_excluded(struct dir_struct *dir, struct index_state *istate, const char *name, int *dtype); +int pl_hashmap_cmp(const void *unused_cmp_data, + const void *a, const void *b, const void *key); + struct pattern_list *add_pattern_list(struct dir_struct *dir, int group_type, const char *src); int add_patterns_from_file_to_list(const char *fname, const char *base, int baselen, diff --git a/environment.c b/environment.c index 89af47cb850490..670d92bcc00db0 100644 --- a/environment.c +++ b/environment.c @@ -69,6 +69,7 @@ enum object_creation_mode object_creation_mode = OBJECT_CREATION_MODE; char *notes_ref_name; int grafts_replace_parents = 1; int core_apply_sparse_checkout; +int core_sparse_checkout_cone; int merge_log_config = -1; int precomposed_unicode = -1; /* see probe_utf8_pathname_composition() */ unsigned long pack_size_limit_cfg; diff --git a/git.c b/git.c index c2eec470c956c5..e775fbad421a5e 100644 --- a/git.c +++ b/git.c @@ -576,6 +576,7 @@ static struct cmd_struct commands[] = { { "show-branch", cmd_show_branch, RUN_SETUP }, { "show-index", cmd_show_index }, { "show-ref", cmd_show_ref, RUN_SETUP }, + { "sparse-checkout", cmd_sparse_checkout, RUN_SETUP | NEED_WORK_TREE }, { "stage", cmd_add, RUN_SETUP | NEED_WORK_TREE }, /* * NEEDSWORK: Until the builtin stash is thoroughly robust and no diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh new file mode 100755 index 00000000000000..f06795fdba529a --- /dev/null +++ b/t/t1091-sparse-checkout-builtin.sh @@ -0,0 +1,212 @@ +#!/bin/sh + +test_description='sparse checkout builtin tests' + +. ./test-lib.sh + +test_expect_success 'setup' ' + git init repo && + ( + cd repo && + echo "initial" >a && + mkdir folder1 folder2 deep && + mkdir deep/deeper1 deep/deeper2 && + mkdir deep/deeper1/deepest && + cp a folder1 && + cp a folder2 && + cp a deep && + cp a deep/deeper1 && + cp a deep/deeper2 && + cp a deep/deeper1/deepest && + git add . && + git commit -m "initial commit" + ) +' + +test_expect_success 'git sparse-checkout list (empty)' ' + git -C repo sparse-checkout list >list 2>err && + test_line_count = 0 list && + test_i18ngrep "this worktree is not sparse (sparse-checkout file may not exist)" err +' + +test_expect_success 'git sparse-checkout list (populated)' ' + test_when_finished rm -f repo/.git/info/sparse-checkout && + cat >expect <<-EOF && + /folder1/ + /deep/ + **/a + !*bin* + EOF + cat expect >repo/.git/info/sparse-checkout && + git -C repo sparse-checkout list >list && + test_cmp expect list +' + +test_expect_success 'git sparse-checkout init' ' + git -C repo sparse-checkout init && + cat >expect <<-EOF && + /* + !/*/ + EOF + test_cmp expect repo/.git/info/sparse-checkout && + git -C repo config --list >config && + test_i18ngrep "core.sparsecheckout=true" config && + ls repo >dir && + echo a >expect && + test_cmp expect dir +' + +test_expect_success 'git sparse-checkout list after init' ' + git -C repo sparse-checkout list >actual && + cat >expect <<-EOF && + /* + !/*/ + EOF + test_cmp expect actual +' + +test_expect_success 'init with existing sparse-checkout' ' + echo "/folder1/" >> repo/.git/info/sparse-checkout && + git -C repo sparse-checkout init && + cat >expect <<-EOF && + /* + !/*/ + /folder1/ + EOF + test_cmp expect repo/.git/info/sparse-checkout && + ls repo >dir && + cat >expect <<-EOF && + a + folder1 + EOF + test_cmp expect dir +' + +test_expect_success 'clone --sparse' ' + git clone --sparse repo clone && + git -C clone sparse-checkout list >actual && + cat >expect <<-EOF && + /* + !/*/ + EOF + test_cmp expect actual && + ls clone >dir && + echo a >expect && + test_cmp expect dir +' + +test_expect_success 'set sparse-checkout using builtin' ' + git -C repo sparse-checkout set "/*" "!/*/" "/folder1/" "/folder2/" && + cat >expect <<-EOF && + /* + !/*/ + /folder1/ + /folder2/ + EOF + git -C repo sparse-checkout list >actual && + test_cmp expect actual && + test_cmp expect repo/.git/info/sparse-checkout && + ls repo >dir && + cat >expect <<-EOF && + a + folder1 + folder2 + EOF + test_cmp expect dir +' + +test_expect_success 'set sparse-checkout using --stdin' ' + cat >expect <<-EOF && + /* + !/*/ + /folder1/ + /folder2/ + EOF + git -C repo sparse-checkout set --stdin actual && + test_cmp expect actual && + test_cmp expect repo/.git/info/sparse-checkout && + ls repo >dir && + cat >expect <<-EOF && + a + folder1 + folder2 + EOF + test_cmp expect dir +' + +test_expect_success 'cone mode: match patterns' ' + git -C repo config --worktree core.sparseCheckoutCone true && + rm -rf repo/a repo/folder1 repo/folder2 && + git -C repo read-tree -mu HEAD && + git -C repo reset --hard && + ls repo >dir && + cat >expect <<-EOF && + a + folder1 + folder2 + EOF + test_cmp expect dir +' + +test_expect_success 'cone mode: warn on bad pattern' ' + test_when_finished mv sparse-checkout repo/.git/info/ && + cp repo/.git/info/sparse-checkout . && + echo "!/deep/deeper/" >>repo/.git/info/sparse-checkout && + git -C repo read-tree -mu HEAD 2>err && + test_i18ngrep "unrecognized negative pattern" err +' + +test_expect_success 'sparse-checkout disable' ' + git -C repo sparse-checkout disable && + test_path_is_missing repo/.git/info/sparse-checkout && + git -C repo config --list >config && + test_i18ngrep "core.sparsecheckout=false" config && + ls repo >dir && + cat >expect <<-EOF && + a + deep + folder1 + folder2 + EOF + test_cmp expect dir +' + +test_expect_success 'cone mode: init and set' ' + git -C repo sparse-checkout init --cone && + git -C repo config --list >config && + test_i18ngrep "core.sparsecheckoutcone=true" config && + ls repo >dir && + echo a >expect && + test_cmp expect dir && + git -C repo sparse-checkout set deep/deeper1/deepest && + ls repo >dir && + cat >expect <<-EOF && + a + deep + EOF + ls repo/deep >dir && + cat >expect <<-EOF && + a + deeper1 + EOF + ls repo/deep/deeper1 >dir && + cat >expect <<-EOF && + a + deepest + EOF + test_cmp expect dir && + cat >expect <<-EOF && + /* + !/*/ + /deep/ + !/deep/*/ + /deep/deeper1/ + !/deep/deeper1/*/ + /deep/deeper1/deepest/ + EOF + test_cmp expect repo/.git/info/sparse-checkout +' + +test_done + diff --git a/unpack-trees.c b/unpack-trees.c index cd548f4fa2d944..43acc0ffd64369 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1280,15 +1280,17 @@ static int clear_ce_flags_dir(struct index_state *istate, struct cache_entry **cache_end; int dtype = DT_DIR; int rc; - enum pattern_match_result ret; - ret = path_matches_pattern_list(prefix->buf, prefix->len, - basename, &dtype, pl, istate); + enum pattern_match_result ret, orig_ret; + orig_ret = path_matches_pattern_list(prefix->buf, prefix->len, + basename, &dtype, pl, istate); strbuf_addch(prefix, '/'); /* If undecided, use matching result of parent dir in defval */ - if (ret == UNDECIDED) + if (orig_ret == UNDECIDED) ret = default_match; + else + ret = orig_ret; for (cache_end = cache; cache_end != cache + nr; cache_end++) { struct cache_entry *ce = *cache_end; @@ -1296,17 +1298,23 @@ static int clear_ce_flags_dir(struct index_state *istate, break; } - /* - * TODO: check pl, if there are no patterns that may conflict - * with ret (iow, we know in advance the incl/excl - * decision for the entire directory), clear flag here without - * calling clear_ce_flags_1(). That function will call - * the expensive path_matches_pattern_list() on every entry. - */ - rc = clear_ce_flags_1(istate, cache, cache_end - cache, - prefix, - select_mask, clear_mask, - pl, ret); + if (pl->use_cone_patterns && orig_ret == MATCHED_RECURSIVE) { + struct cache_entry **ce = cache; + rc = (cache_end - cache) / sizeof(struct cache_entry *); + + while (ce < cache_end) { + (*ce)->ce_flags &= ~clear_mask; + ce++; + } + } else if (pl->use_cone_patterns && orig_ret == NOT_MATCHED) { + rc = (cache_end - cache) / sizeof(struct cache_entry *); + } else { + rc = clear_ce_flags_1(istate, cache, cache_end - cache, + prefix, + select_mask, clear_mask, + pl, ret); + } + strbuf_setlen(prefix, prefix->len - 1); return rc; } @@ -1404,15 +1412,23 @@ static int clear_ce_flags(struct index_state *istate, struct pattern_list *pl) { static struct strbuf prefix = STRBUF_INIT; + char label[100]; + int rval; strbuf_reset(&prefix); - return clear_ce_flags_1(istate, + xsnprintf(label, sizeof(label), "clear_ce_flags(0x%08lx,0x%08lx)", + (unsigned long)select_mask, (unsigned long)clear_mask); + trace2_region_enter("unpack_trees", label, the_repository); + rval = clear_ce_flags_1(istate, istate->cache, istate->cache_nr, &prefix, select_mask, clear_mask, pl, 0); + trace2_region_leave("unpack_trees", label, the_repository); + + return rval; } /*