|
| 1 | +/* |
| 2 | + * path-walk.c: implementation for path-based walks of the object graph. |
| 3 | + */ |
| 4 | +#include "git-compat-util.h" |
| 5 | +#include "path-walk.h" |
| 6 | +#include "blob.h" |
| 7 | +#include "commit.h" |
| 8 | +#include "dir.h" |
| 9 | +#include "hashmap.h" |
| 10 | +#include "hex.h" |
| 11 | +#include "object.h" |
| 12 | +#include "oid-array.h" |
| 13 | +#include "revision.h" |
| 14 | +#include "string-list.h" |
| 15 | +#include "strmap.h" |
| 16 | +#include "trace2.h" |
| 17 | +#include "tree.h" |
| 18 | +#include "tree-walk.h" |
| 19 | + |
| 20 | +struct type_and_oid_list |
| 21 | +{ |
| 22 | + enum object_type type; |
| 23 | + struct oid_array oids; |
| 24 | +}; |
| 25 | + |
| 26 | +#define TYPE_AND_OID_LIST_INIT { \ |
| 27 | + .type = OBJ_NONE, \ |
| 28 | + .oids = OID_ARRAY_INIT \ |
| 29 | +} |
| 30 | + |
| 31 | +struct path_walk_context { |
| 32 | + /** |
| 33 | + * Repeats of data in 'struct path_walk_info' for |
| 34 | + * access with fewer characters. |
| 35 | + */ |
| 36 | + struct repository *repo; |
| 37 | + struct rev_info *revs; |
| 38 | + struct path_walk_info *info; |
| 39 | + |
| 40 | + /** |
| 41 | + * Map a path to a 'struct type_and_oid_list' |
| 42 | + * containing the objects discovered at that |
| 43 | + * path. |
| 44 | + */ |
| 45 | + struct strmap paths_to_lists; |
| 46 | + |
| 47 | + /** |
| 48 | + * Store the current list of paths in a stack, to |
| 49 | + * facilitate depth-first-search without recursion. |
| 50 | + */ |
| 51 | + struct string_list path_stack; |
| 52 | +}; |
| 53 | + |
| 54 | +static int add_children(struct path_walk_context *ctx, |
| 55 | + const char *base_path, |
| 56 | + struct object_id *oid) |
| 57 | +{ |
| 58 | + struct tree_desc desc; |
| 59 | + struct name_entry entry; |
| 60 | + struct strbuf path = STRBUF_INIT; |
| 61 | + size_t base_len; |
| 62 | + struct tree *tree = lookup_tree(ctx->repo, oid); |
| 63 | + |
| 64 | + if (!tree) { |
| 65 | + error(_("failed to walk children of tree %s: not found"), |
| 66 | + oid_to_hex(oid)); |
| 67 | + return -1; |
| 68 | + } else if (parse_tree_gently(tree, 1)) { |
| 69 | + die("bad tree object %s", oid_to_hex(oid)); |
| 70 | + } |
| 71 | + |
| 72 | + strbuf_addstr(&path, base_path); |
| 73 | + base_len = path.len; |
| 74 | + |
| 75 | + parse_tree(tree); |
| 76 | + init_tree_desc(&desc, &tree->object.oid, tree->buffer, tree->size); |
| 77 | + while (tree_entry(&desc, &entry)) { |
| 78 | + struct type_and_oid_list *list; |
| 79 | + struct object *o; |
| 80 | + /* Not actually true, but we will ignore submodules later. */ |
| 81 | + enum object_type type = S_ISDIR(entry.mode) ? OBJ_TREE : OBJ_BLOB; |
| 82 | + |
| 83 | + /* Skip submodules. */ |
| 84 | + if (S_ISGITLINK(entry.mode)) |
| 85 | + continue; |
| 86 | + |
| 87 | + if (type == OBJ_TREE) { |
| 88 | + struct tree *child = lookup_tree(ctx->repo, &entry.oid); |
| 89 | + o = child ? &child->object : NULL; |
| 90 | + } else if (type == OBJ_BLOB) { |
| 91 | + struct blob *child = lookup_blob(ctx->repo, &entry.oid); |
| 92 | + o = child ? &child->object : NULL; |
| 93 | + } else { |
| 94 | + /* Wrong type? */ |
| 95 | + continue; |
| 96 | + } |
| 97 | + |
| 98 | + if (!o) /* report error?*/ |
| 99 | + continue; |
| 100 | + |
| 101 | + /* Skip this object if already seen. */ |
| 102 | + if (o->flags & SEEN) |
| 103 | + continue; |
| 104 | + o->flags |= SEEN; |
| 105 | + |
| 106 | + strbuf_setlen(&path, base_len); |
| 107 | + strbuf_add(&path, entry.path, entry.pathlen); |
| 108 | + |
| 109 | + /* |
| 110 | + * Trees will end with "/" for concatenation and distinction |
| 111 | + * from blobs at the same path. |
| 112 | + */ |
| 113 | + if (type == OBJ_TREE) |
| 114 | + strbuf_addch(&path, '/'); |
| 115 | + |
| 116 | + if (!(list = strmap_get(&ctx->paths_to_lists, path.buf))) { |
| 117 | + CALLOC_ARRAY(list, 1); |
| 118 | + list->type = type; |
| 119 | + strmap_put(&ctx->paths_to_lists, path.buf, list); |
| 120 | + string_list_append(&ctx->path_stack, path.buf); |
| 121 | + } |
| 122 | + oid_array_append(&list->oids, &entry.oid); |
| 123 | + } |
| 124 | + |
| 125 | + free_tree_buffer(tree); |
| 126 | + strbuf_release(&path); |
| 127 | + return 0; |
| 128 | +} |
| 129 | + |
| 130 | +/* |
| 131 | + * For each path in paths_to_explore, walk the trees another level |
| 132 | + * and add any found blobs to the batch (but only if they exist and |
| 133 | + * haven't been added yet). |
| 134 | + */ |
| 135 | +static int walk_path(struct path_walk_context *ctx, |
| 136 | + const char *path) |
| 137 | +{ |
| 138 | + struct type_and_oid_list *list; |
| 139 | + int ret = 0; |
| 140 | + |
| 141 | + list = strmap_get(&ctx->paths_to_lists, path); |
| 142 | + |
| 143 | + /* Evaluate function pointer on this data. */ |
| 144 | + ret = ctx->info->path_fn(path, &list->oids, list->type, |
| 145 | + ctx->info->path_fn_data); |
| 146 | + |
| 147 | + /* Expand data for children. */ |
| 148 | + if (list->type == OBJ_TREE) { |
| 149 | + for (size_t i = 0; i < list->oids.nr; i++) { |
| 150 | + ret |= add_children(ctx, |
| 151 | + path, |
| 152 | + &list->oids.oid[i]); |
| 153 | + } |
| 154 | + } |
| 155 | + |
| 156 | + oid_array_clear(&list->oids); |
| 157 | + strmap_remove(&ctx->paths_to_lists, path, 1); |
| 158 | + return ret; |
| 159 | +} |
| 160 | + |
| 161 | +static void clear_strmap(struct strmap *map) |
| 162 | +{ |
| 163 | + struct hashmap_iter iter; |
| 164 | + struct strmap_entry *e; |
| 165 | + |
| 166 | + hashmap_for_each_entry(&map->map, &iter, e, ent) { |
| 167 | + struct type_and_oid_list *list = e->value; |
| 168 | + oid_array_clear(&list->oids); |
| 169 | + } |
| 170 | + strmap_clear(map, 1); |
| 171 | + strmap_init(map); |
| 172 | +} |
| 173 | + |
| 174 | +/** |
| 175 | + * Given the configuration of 'info', walk the commits based on 'info->revs' and |
| 176 | + * call 'info->path_fn' on each discovered path. |
| 177 | + * |
| 178 | + * Returns nonzero on an error. |
| 179 | + */ |
| 180 | +int walk_objects_by_path(struct path_walk_info *info) |
| 181 | +{ |
| 182 | + const char *root_path = ""; |
| 183 | + int ret = 0; |
| 184 | + size_t commits_nr = 0, paths_nr = 0; |
| 185 | + struct commit *c; |
| 186 | + struct type_and_oid_list *root_tree_list; |
| 187 | + struct path_walk_context ctx = { |
| 188 | + .repo = info->revs->repo, |
| 189 | + .revs = info->revs, |
| 190 | + .info = info, |
| 191 | + .path_stack = STRING_LIST_INIT_DUP, |
| 192 | + .paths_to_lists = STRMAP_INIT |
| 193 | + }; |
| 194 | + |
| 195 | + trace2_region_enter("path-walk", "commit-walk", info->revs->repo); |
| 196 | + |
| 197 | + /* Insert a single list for the root tree into the paths. */ |
| 198 | + CALLOC_ARRAY(root_tree_list, 1); |
| 199 | + root_tree_list->type = OBJ_TREE; |
| 200 | + strmap_put(&ctx.paths_to_lists, root_path, root_tree_list); |
| 201 | + |
| 202 | + if (prepare_revision_walk(info->revs)) |
| 203 | + die(_("failed to setup revision walk")); |
| 204 | + |
| 205 | + while ((c = get_revision(info->revs))) { |
| 206 | + struct object_id *oid = get_commit_tree_oid(c); |
| 207 | + struct tree *t = lookup_tree(info->revs->repo, oid); |
| 208 | + commits_nr++; |
| 209 | + |
| 210 | + if (t) |
| 211 | + oid_array_append(&root_tree_list->oids, oid); |
| 212 | + else |
| 213 | + warning("could not find tree %s", oid_to_hex(oid)); |
| 214 | + } |
| 215 | + |
| 216 | + trace2_data_intmax("path-walk", ctx.repo, "commits", commits_nr); |
| 217 | + trace2_region_leave("path-walk", "commit-walk", info->revs->repo); |
| 218 | + |
| 219 | + string_list_append(&ctx.path_stack, root_path); |
| 220 | + |
| 221 | + trace2_region_enter("path-walk", "path-walk", info->revs->repo); |
| 222 | + while (!ret && ctx.path_stack.nr) { |
| 223 | + char *path = ctx.path_stack.items[ctx.path_stack.nr - 1].string; |
| 224 | + ctx.path_stack.nr--; |
| 225 | + paths_nr++; |
| 226 | + |
| 227 | + ret = walk_path(&ctx, path); |
| 228 | + |
| 229 | + free(path); |
| 230 | + } |
| 231 | + trace2_data_intmax("path-walk", ctx.repo, "paths", paths_nr); |
| 232 | + trace2_region_leave("path-walk", "path-walk", info->revs->repo); |
| 233 | + |
| 234 | + clear_strmap(&ctx.paths_to_lists); |
| 235 | + string_list_clear(&ctx.path_stack, 0); |
| 236 | + return ret; |
| 237 | +} |
0 commit comments