Skip to content

Commit 8ce6fd0

Browse files
committed
bundle-uri: fetch a list of bundles
When the content at a given bundle URI is not understood as a bundle (based on inspecting the initial content), then Git currently gives up and ignores that content. Independent bundle providers may want to split up the bundle content into multiple bundles, but still make them available from a single URI. Teach Git to attempt parsing the bundle URI content as a Git config file providing the key=value pairs for a bundle list. Git then looks at the mode of the list to see if ANY single bundle is sufficient or if ALL bundles are required. The content at the selected URIs are downloaded and the content is inspected again, creating a recursive process. To guard the recursion against malformed or malicious content, limit the recursion depth to a reasonable four for now. This can be converted to a configured value in the future if necessary. The value of four is twice as high as expected to be useful (a bundle list is unlikely to point to more bundle lists). To test this scenario, create an interesting bundle topology where three incremental bundles are built on top of a single full bundle. By using a merge commit, the two middle bundles are "independent" in that they do not require each other in order to unbundle themselves. They each only need the base bundle. The bundle containing the merge commit requires both of the middle bundles, though. This leads to some interesting decisions when unbundling, especially when we later implement heuristics that promote downloading bundles until the prerequisite commits are satisfied. Signed-off-by: Derrick Stolee <[email protected]>
1 parent 3c3460d commit 8ce6fd0

File tree

3 files changed

+293
-17
lines changed

3 files changed

+293
-17
lines changed

bundle-uri.c

Lines changed: 194 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,10 @@ void init_bundle_list(struct bundle_list *list)
3535
static int clear_remote_bundle_info(struct remote_bundle_info *bundle,
3636
void *data)
3737
{
38-
free(bundle->id);
39-
free(bundle->uri);
38+
FREE_AND_NULL(bundle->id);
39+
FREE_AND_NULL(bundle->uri);
4040
strbuf_release(&bundle->file);
41+
bundle->unbundled = 0;
4142
return 0;
4243
}
4344

@@ -334,52 +335,228 @@ static int unbundle_from_file(struct repository *r, const char *file)
334335
return result;
335336
}
336337

338+
struct bundle_list_context {
339+
struct repository *r;
340+
struct bundle_list *list;
341+
enum bundle_list_mode mode;
342+
int count;
343+
int depth;
344+
};
345+
346+
/*
347+
* This early definition is necessary because we use indirect recursion:
348+
*
349+
* While iterating through a bundle list that was downloaded as part
350+
* of fetch_bundle_uri_internal(), iterator methods eventually call it
351+
* again, but with depth + 1.
352+
*/
353+
static int fetch_bundle_uri_internal(struct repository *r,
354+
struct remote_bundle_info *bundle,
355+
int depth,
356+
struct bundle_list *list);
357+
358+
static int download_bundle_to_file(struct remote_bundle_info *bundle, void *data)
359+
{
360+
struct bundle_list_context *ctx = data;
361+
362+
if (ctx->mode == BUNDLE_MODE_ANY && ctx->count)
363+
return 0;
364+
365+
ctx->count++;
366+
return fetch_bundle_uri_internal(ctx->r, bundle, ctx->depth + 1, ctx->list);
367+
}
368+
369+
static int download_bundle_list(struct repository *r,
370+
struct bundle_list *local_list,
371+
struct bundle_list *global_list,
372+
int depth)
373+
{
374+
struct bundle_list_context ctx = {
375+
.r = r,
376+
.list = global_list,
377+
.depth = depth + 1,
378+
.mode = local_list->mode,
379+
};
380+
381+
return for_all_bundles_in_list(local_list, download_bundle_to_file, &ctx);
382+
}
383+
384+
static int fetch_bundle_list_in_config_format(struct repository *r,
385+
struct bundle_list *global_list,
386+
struct remote_bundle_info *bundle,
387+
int depth)
388+
{
389+
int result;
390+
struct bundle_list list_from_bundle;
391+
392+
init_bundle_list(&list_from_bundle);
393+
394+
if ((result = parse_bundle_list_in_config_format(bundle->uri,
395+
bundle->file.buf,
396+
&list_from_bundle)))
397+
goto cleanup;
398+
399+
if (list_from_bundle.mode == BUNDLE_MODE_NONE) {
400+
warning(_("unrecognized bundle mode from URI '%s'"),
401+
bundle->uri);
402+
result = -1;
403+
goto cleanup;
404+
}
405+
406+
if ((result = download_bundle_list(r, &list_from_bundle,
407+
global_list, depth)))
408+
goto cleanup;
409+
410+
cleanup:
411+
clear_bundle_list(&list_from_bundle);
412+
return result;
413+
}
414+
337415
/**
338416
* This limits the recursion on fetch_bundle_uri_internal() when following
339417
* bundle lists.
340418
*/
341419
static int max_bundle_uri_depth = 4;
342420

421+
/**
422+
* Recursively download all bundles advertised at the given URI
423+
* to files. If the file is a bundle, then add it to the given
424+
* 'list'. Otherwise, expect a bundle list and recurse on the
425+
* URIs in that list according to the list mode (ANY or ALL).
426+
*/
343427
static int fetch_bundle_uri_internal(struct repository *r,
344-
const char *uri,
345-
int depth)
428+
struct remote_bundle_info *bundle,
429+
int depth,
430+
struct bundle_list *list)
346431
{
347432
int result = 0;
348-
struct strbuf filename = STRBUF_INIT;
433+
struct remote_bundle_info *bcopy;
349434

350435
if (depth >= max_bundle_uri_depth) {
351436
warning(_("exceeded bundle URI recursion limit (%d)"),
352437
max_bundle_uri_depth);
353438
return -1;
354439
}
355440

356-
if ((result = find_temp_filename(&filename)))
441+
if (!bundle->file.len &&
442+
(result = find_temp_filename(&bundle->file)))
357443
goto cleanup;
358444

359-
if ((result = copy_uri_to_file(filename.buf, uri))) {
360-
warning(_("failed to download bundle from URI '%s'"), uri);
445+
if ((result = copy_uri_to_file(bundle->file.buf, bundle->uri))) {
446+
warning(_("failed to download bundle from URI '%s'"), bundle->uri);
361447
goto cleanup;
362448
}
363449

364-
if ((result = !is_bundle(filename.buf, 0))) {
365-
warning(_("file at URI '%s' is not a bundle"), uri);
450+
if ((result = !is_bundle(bundle->file.buf, 1))) {
451+
result = fetch_bundle_list_in_config_format(
452+
r, list, bundle, depth);
453+
if (result)
454+
warning(_("file at URI '%s' is not a bundle or bundle list"),
455+
bundle->uri);
366456
goto cleanup;
367457
}
368458

369-
if ((result = unbundle_from_file(r, filename.buf))) {
370-
warning(_("failed to unbundle bundle from URI '%s'"), uri);
371-
goto cleanup;
372-
}
459+
/* Copy the bundle and insert it into the global list. */
460+
CALLOC_ARRAY(bcopy, 1);
461+
bcopy->id = xstrdup(bundle->id);
462+
strbuf_init(&bcopy->file, 0);
463+
strbuf_add(&bcopy->file, bundle->file.buf, bundle->file.len);
464+
hashmap_entry_init(&bcopy->ent, strhash(bcopy->id));
465+
hashmap_add(&list->bundles, &bcopy->ent);
373466

374467
cleanup:
375-
unlink(filename.buf);
376-
strbuf_release(&filename);
468+
if (result)
469+
unlink(bundle->file.buf);
377470
return result;
378471
}
379472

473+
struct attempt_unbundle_context {
474+
struct repository *r;
475+
int success_count;
476+
int failure_count;
477+
};
478+
479+
static int attempt_unbundle(struct remote_bundle_info *info, void *data)
480+
{
481+
struct attempt_unbundle_context *ctx = data;
482+
483+
if (info->unbundled || !unbundle_from_file(ctx->r, info->file.buf)) {
484+
ctx->success_count++;
485+
info->unbundled = 1;
486+
} else {
487+
ctx->failure_count++;
488+
}
489+
490+
return 0;
491+
}
492+
493+
static int unbundle_all_bundles(struct repository *r,
494+
struct bundle_list *list)
495+
{
496+
int last_success_count = -1;
497+
struct attempt_unbundle_context ctx = {
498+
.r = r,
499+
};
500+
501+
/*
502+
* Iterate through all bundles looking for ones that can
503+
* successfully unbundle. If any succeed, then perhaps another
504+
* will succeed in the next attempt.
505+
*/
506+
while (last_success_count < ctx.success_count) {
507+
last_success_count = ctx.success_count;
508+
509+
ctx.success_count = 0;
510+
ctx.failure_count = 0;
511+
for_all_bundles_in_list(list, attempt_unbundle, &ctx);
512+
}
513+
514+
if (ctx.success_count)
515+
git_config_set_multivar_gently("log.excludedecoration",
516+
"refs/bundle/",
517+
"refs/bundle/",
518+
CONFIG_FLAGS_FIXED_VALUE |
519+
CONFIG_FLAGS_MULTI_REPLACE);
520+
521+
if (ctx.failure_count)
522+
warning(_("failed to unbundle %d bundles"),
523+
ctx.failure_count);
524+
525+
return 0;
526+
}
527+
528+
static int unlink_bundle(struct remote_bundle_info *info, void *data)
529+
{
530+
if (info->file.buf)
531+
unlink_or_warn(info->file.buf);
532+
return 0;
533+
}
534+
380535
int fetch_bundle_uri(struct repository *r, const char *uri)
381536
{
382-
return fetch_bundle_uri_internal(r, uri, 0);
537+
int result;
538+
struct bundle_list list;
539+
struct remote_bundle_info bundle = {
540+
.uri = xstrdup(uri),
541+
.id = xstrdup("<root>"),
542+
.file = STRBUF_INIT,
543+
};
544+
545+
init_bundle_list(&list);
546+
547+
/* If a bundle is added to this global list, then it is required. */
548+
list.mode = BUNDLE_MODE_ALL;
549+
550+
if ((result = fetch_bundle_uri_internal(r, &bundle, 0, &list)))
551+
goto cleanup;
552+
553+
result = unbundle_all_bundles(r, &list);
554+
555+
cleanup:
556+
for_all_bundles_in_list(&list, unlink_bundle, NULL);
557+
clear_bundle_list(&list);
558+
clear_remote_bundle_info(&bundle, NULL);
559+
return result;
383560
}
384561

385562
/**

bundle-uri.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,12 @@ struct remote_bundle_info {
3535
* an empty string.
3636
*/
3737
struct strbuf file;
38+
39+
/**
40+
* If the bundle has been unbundled successfully, then
41+
* this boolean is true.
42+
*/
43+
unsigned unbundled:1;
3844
};
3945

4046
#define REMOTE_BUNDLE_INFO_INIT { \

t/t5558-clone-bundle-uri.sh

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,72 @@ test_expect_success 'clone with file:// bundle' '
4141
test_cmp expect actual
4242
'
4343

44+
# To get interesting tests for bundle lists, we need to construct a
45+
# somewhat-interesting commit history.
46+
#
47+
# ---------------- bundle-4
48+
#
49+
# 4
50+
# / \
51+
# ----|---|------- bundle-3
52+
# | |
53+
# | 3
54+
# | |
55+
# ----|---|------- bundle-2
56+
# | |
57+
# 2 |
58+
# | |
59+
# ----|---|------- bundle-1
60+
# \ /
61+
# 1
62+
# |
63+
# (previous commits)
64+
test_expect_success 'construct incremental bundle list' '
65+
(
66+
cd clone-from &&
67+
git checkout -b base &&
68+
test_commit 1 &&
69+
git checkout -b left &&
70+
test_commit 2 &&
71+
git checkout -b right base &&
72+
test_commit 3 &&
73+
git checkout -b merge left &&
74+
git merge right -m "4" &&
75+
76+
git bundle create bundle-1.bundle base &&
77+
git bundle create bundle-2.bundle base..left &&
78+
git bundle create bundle-3.bundle base..right &&
79+
git bundle create bundle-4.bundle merge --not left right
80+
)
81+
'
82+
83+
test_expect_success 'clone bundle list (file, no heuristic)' '
84+
cat >bundle-list <<-EOF &&
85+
[bundle]
86+
version = 1
87+
mode = all
88+
89+
[bundle "bundle-1"]
90+
uri = file://$(pwd)/clone-from/bundle-1.bundle
91+
92+
[bundle "bundle-2"]
93+
uri = file://$(pwd)/clone-from/bundle-2.bundle
94+
95+
[bundle "bundle-3"]
96+
uri = file://$(pwd)/clone-from/bundle-3.bundle
97+
98+
[bundle "bundle-4"]
99+
uri = file://$(pwd)/clone-from/bundle-4.bundle
100+
EOF
101+
102+
git clone --bundle-uri="file://$(pwd)/bundle-list" . clone-list-file &&
103+
for oid in $(git -C clone-from for-each-ref --format="%(objectname)")
104+
do
105+
git -C clone-list-file rev-parse $oid || return 1
106+
done
107+
'
108+
109+
44110
#########################################################################
45111
# HTTP tests begin here
46112

@@ -75,6 +141,33 @@ test_expect_success 'clone HTTP bundle' '
75141
test_config -C clone-http log.excludedecoration refs/bundle/
76142
'
77143

144+
test_expect_success 'clone bundle list (HTTP, no heuristic)' '
145+
cp clone-from/bundle-*.bundle "$HTTPD_DOCUMENT_ROOT_PATH/" &&
146+
cat >"$HTTPD_DOCUMENT_ROOT_PATH/bundle-list" <<-EOF &&
147+
[bundle]
148+
version = 1
149+
mode = all
150+
151+
[bundle "bundle-1"]
152+
uri = $HTTPD_URL/bundle-1.bundle
153+
154+
[bundle "bundle-2"]
155+
uri = $HTTPD_URL/bundle-2.bundle
156+
157+
[bundle "bundle-3"]
158+
uri = $HTTPD_URL/bundle-3.bundle
159+
160+
[bundle "bundle-4"]
161+
uri = $HTTPD_URL/bundle-4.bundle
162+
EOF
163+
164+
git clone --bundle-uri="$HTTPD_URL/bundle-list" . clone-list-http &&
165+
for oid in $(git -C clone-from for-each-ref --format="%(objectname)")
166+
do
167+
git -C clone-list-http rev-parse $oid || return 1
168+
done
169+
'
170+
78171
# Do not add tests here unless they use the HTTP server, as they will
79172
# not run unless the HTTP dependencies exist.
80173

0 commit comments

Comments
 (0)