diff --git a/.gitignore b/.gitignore
index 8caf3700c2305d..3f6fdb31a5ea32 100644
--- a/.gitignore
+++ b/.gitignore
@@ -19,6 +19,7 @@
 /git-apply
 /git-archimport
 /git-archive
+/git-backfill
 /git-bisect
 /git-blame
 /git-branch
@@ -164,6 +165,7 @@
 /git-submodule
 /git-submodule--helper
 /git-subtree
+/git-survey
 /git-svn
 /git-switch
 /git-symbolic-ref
diff --git a/Documentation/config/pack.txt b/Documentation/config/pack.txt
index da527377fafcb6..08d06271177006 100644
--- a/Documentation/config/pack.txt
+++ b/Documentation/config/pack.txt
@@ -155,6 +155,14 @@ pack.useSparse::
 	commits contain certain types of direct renames. Default is
 	`true`.
 
+pack.usePathWalk::
+	When true, git will default to using the '--path-walk' option in
+	'git pack-objects' when the '--revs' option is present. This
+	algorithm groups objects by path to maximize the ability to
+	compute delta chains across historical versions of the same
+	object. This may disable other options, such as using bitmaps to
+	enumerate objects.
+
 pack.preferBitmapTips::
 	When selecting which commits will receive bitmaps, prefer a
 	commit at the tip of any reference that is a suffix of any value
diff --git a/Documentation/git-backfill.txt b/Documentation/git-backfill.txt
new file mode 100644
index 00000000000000..066ec6b161a22c
--- /dev/null
+++ b/Documentation/git-backfill.txt
@@ -0,0 +1,60 @@
+git-backfill(1)
+===============
+
+NAME
+----
+git-backfill - Download missing objects in a partial clone
+
+
+SYNOPSIS
+--------
+[verse]
+'git backfill' [--batch-size=<n>] [--[no-]sparse]
+
+DESCRIPTION
+-----------
+
+Blobless partial clones are created using `git clone --filter=blob:none`
+and then configure the local repository such that the Git client avoids
+downloading blob objects unless they are required for a local operation.
+This initially means that the clone and later fetches download reachable
+commits and trees but no blobs. Later operations that change the `HEAD`
+pointer, such as `git checkout` or `git merge`, may need to download
+missing blobs in order to complete their operation.
+
+In the worst cases, commands that compute blob diffs, such as `git blame`,
+become very slow as they download the missing blobs in single-blob
+requests to satisfy the missing object as the Git command needs it. This
+leads to multiple download requests and no ability for the Git server to
+provide delta compression across those objects.
+
+The `git backfill` command provides a way for the user to request that
+Git downloads the missing blobs (with optional filters) such that the
+missing blobs representing historical versions of files can be downloaded
+in batches. The `backfill` command attempts to optimize the request by
+grouping blobs that appear at the same path, hopefully leading to good
+delta compression in the packfile sent by the server.
+
+By default, `git backfill` downloads all blobs reachable from the `HEAD`
+commit. This set can be restricted or expanded using various options.
+
+OPTIONS
+-------
+
+--batch-size=<n>::
+	Specify a minimum size for a batch of missing objects to request
+	from the server. This size may be exceeded by the last set of
+	blobs seen at a given path. Default batch size is 16,000.
+
+--[no-]sparse::
+	Only download objects if they appear at a path that matches the
+	current sparse-checkout. If the sparse-checkout feature is enabled,
+	then `--sparse` is assumed and can be disabled with `--no-sparse`.
+
+SEE ALSO
+--------
+linkgit:git-clone[1].
+
+GIT
+---
+Part of the linkgit:git[1] suite
diff --git a/Documentation/git-survey.txt b/Documentation/git-survey.txt
new file mode 100644
index 00000000000000..c648ef704e3806
--- /dev/null
+++ b/Documentation/git-survey.txt
@@ -0,0 +1,70 @@
+git-survey(1)
+=============
+
+NAME
+----
+git-survey - EXPERIMENTAL: Measure various repository dimensions of scale
+
+SYNOPSIS
+--------
+[verse]
+(EXPERIMENTAL!) `git survey` <options>
+
+DESCRIPTION
+-----------
+
+Survey the repository and measure various dimensions of scale.
+
+As repositories grow to "monorepo" size, certain data shapes can cause
+performance problems.  `git-survey` attempts to measure and report on
+known problem areas.
+
+Ref Selection and Reachable Objects
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In this first analysis phase, `git survey` will iterate over the set of
+requested branches, tags, and other refs and treewalk over all of the
+reachable commits, trees, and blobs and generate various statistics.
+
+OPTIONS
+-------
+
+--progress::
+	Show progress.  This is automatically enabled when interactive.
+
+Ref Selection
+~~~~~~~~~~~~~
+
+The following options control the set of refs that `git survey` will examine.
+By default, `git survey` will look at tags, local branches, and remote refs.
+If any of the following options are given, the default set is cleared and
+only refs for the given options are added.
+
+--all-refs::
+	Use all refs.  This includes local branches, tags, remote refs,
+	notes, and stashes.  This option overrides all of the following.
+
+--branches::
+	Add local branches (`refs/heads/`) to the set.
+
+--tags::
+	Add tags (`refs/tags/`) to the set.
+
+--remotes::
+	Add remote branches (`refs/remote/`) to the set.
+
+--detached::
+	Add HEAD to the set.
+
+--other::
+	Add notes (`refs/notes/`) and stashes (`refs/stash/`) to the set.
+
+OUTPUT
+------
+
+By default, `git survey` will print information about the repository in a
+human-readable format that includes overviews and tables.
+
+GIT
+---
+Part of the linkgit:git[1] suite
diff --git a/Makefile b/Makefile
index deb175a0408782..462aff65a5089e 100644
--- a/Makefile
+++ b/Makefile
@@ -808,6 +808,7 @@ TEST_BUILTINS_OBJS += test-lazy-init-name-hash.o
 TEST_BUILTINS_OBJS += test-match-trees.o
 TEST_BUILTINS_OBJS += test-mergesort.o
 TEST_BUILTINS_OBJS += test-mktemp.o
+TEST_BUILTINS_OBJS += test-name-hash.o
 TEST_BUILTINS_OBJS += test-oid-array.o
 TEST_BUILTINS_OBJS += test-online-cpus.o
 TEST_BUILTINS_OBJS += test-pack-mtimes.o
@@ -1090,6 +1091,7 @@ LIB_OBJS += parse-options.o
 LIB_OBJS += patch-delta.o
 LIB_OBJS += patch-ids.o
 LIB_OBJS += path.o
+LIB_OBJS += path-walk.o
 LIB_OBJS += pathspec.o
 LIB_OBJS += pkt-line.o
 LIB_OBJS += preload-index.o
@@ -1197,6 +1199,7 @@ BUILTIN_OBJS += builtin/am.o
 BUILTIN_OBJS += builtin/annotate.o
 BUILTIN_OBJS += builtin/apply.o
 BUILTIN_OBJS += builtin/archive.o
+BUILTIN_OBJS += builtin/backfill.o
 BUILTIN_OBJS += builtin/bisect.o
 BUILTIN_OBJS += builtin/blame.o
 BUILTIN_OBJS += builtin/branch.o
@@ -1301,6 +1304,7 @@ BUILTIN_OBJS += builtin/sparse-checkout.o
 BUILTIN_OBJS += builtin/stash.o
 BUILTIN_OBJS += builtin/stripspace.o
 BUILTIN_OBJS += builtin/submodule--helper.o
+BUILTIN_OBJS += builtin/survey.o
 BUILTIN_OBJS += builtin/symbolic-ref.o
 BUILTIN_OBJS += builtin/tag.o
 BUILTIN_OBJS += builtin/unpack-file.o
diff --git a/builtin.h b/builtin.h
index 14fa0171607b17..d4e8cf3b97b590 100644
--- a/builtin.h
+++ b/builtin.h
@@ -127,6 +127,7 @@ int cmd_am(int argc, const char **argv, const char *prefix);
 int cmd_annotate(int argc, const char **argv, const char *prefix);
 int cmd_apply(int argc, const char **argv, const char *prefix);
 int cmd_archive(int argc, const char **argv, const char *prefix);
+int cmd_backfill(int argc, const char **argv, const char *prefix);
 int cmd_bisect(int argc, const char **argv, const char *prefix);
 int cmd_blame(int argc, const char **argv, const char *prefix);
 int cmd_branch(int argc, const char **argv, const char *prefix);
@@ -238,6 +239,7 @@ int cmd_status(int argc, const char **argv, const char *prefix);
 int cmd_stash(int argc, const char **argv, const char *prefix);
 int cmd_stripspace(int argc, const char **argv, const char *prefix);
 int cmd_submodule__helper(int argc, const char **argv, const char *prefix);
+int cmd_survey(int argc, const char **argv, const char *prefix);
 int cmd_switch(int argc, const char **argv, const char *prefix);
 int cmd_symbolic_ref(int argc, const char **argv, const char *prefix);
 int cmd_tag(int argc, const char **argv, const char *prefix);
diff --git a/builtin/backfill.c b/builtin/backfill.c
new file mode 100644
index 00000000000000..2a1b043f18821a
--- /dev/null
+++ b/builtin/backfill.c
@@ -0,0 +1,141 @@
+#include "builtin.h"
+#include "git-compat-util.h"
+#include "config.h"
+#include "parse-options.h"
+#include "repository.h"
+#include "commit.h"
+#include "dir.h"
+#include "environment.h"
+#include "hex.h"
+#include "tree.h"
+#include "tree-walk.h"
+#include "object.h"
+#include "object-store-ll.h"
+#include "oid-array.h"
+#include "oidset.h"
+#include "promisor-remote.h"
+#include "strmap.h"
+#include "string-list.h"
+#include "revision.h"
+#include "trace2.h"
+#include "progress.h"
+#include "packfile.h"
+#include "path-walk.h"
+
+static const char * const builtin_backfill_usage[] = {
+	N_("git backfill [--batch-size=<n>] [--[no-]sparse]"),
+	NULL
+};
+
+struct backfill_context {
+	struct repository *repo;
+	struct oid_array current_batch;
+	size_t batch_size;
+	int sparse;
+};
+
+static void clear_backfill_context(struct backfill_context *ctx)
+{
+	oid_array_clear(&ctx->current_batch);
+}
+
+static void download_batch(struct backfill_context *ctx)
+{
+	promisor_remote_get_direct(ctx->repo,
+				   ctx->current_batch.oid,
+				   ctx->current_batch.nr);
+	oid_array_clear(&ctx->current_batch);
+
+	/*
+	 * We likely have a new packfile. Add it to the packed list to
+	 * avoid possible duplicate downloads of the same objects.
+	 */
+	reprepare_packed_git(ctx->repo);
+}
+
+static int fill_missing_blobs(const char *path,
+			      struct oid_array *list,
+			      enum object_type type,
+			      void *data)
+{
+	struct backfill_context *ctx = data;
+
+	if (type != OBJ_BLOB)
+		BUG("fill_missing_blobs only takes blob objects");
+
+	for (size_t i = 0; i < list->nr; i++) {
+		off_t size = 0;
+		struct object_info info = OBJECT_INFO_INIT;
+		info.disk_sizep = &size;
+		if (oid_object_info_extended(the_repository,
+					     &list->oid[i],
+					     &info,
+					     OBJECT_INFO_FOR_PREFETCH) ||
+		    !size)
+			oid_array_append(&ctx->current_batch, &list->oid[i]);
+	}
+
+	if (ctx->current_batch.nr >= ctx->batch_size)
+		download_batch(ctx);
+
+	return 0;
+}
+
+static int do_backfill(struct backfill_context *ctx)
+{
+	struct rev_info revs;
+	struct path_walk_info info = PATH_WALK_INFO_INIT;
+	int ret;
+
+	if (ctx->sparse) {
+		CALLOC_ARRAY(info.pl, 1);
+		if (get_sparse_checkout_patterns(info.pl))
+			return error(_("problem loading sparse-checkout"));
+	}
+
+	repo_init_revisions(ctx->repo, &revs, "");
+	handle_revision_arg("HEAD", &revs, 0, 0);
+
+	info.revs = &revs;
+	info.path_fn = fill_missing_blobs;
+	info.path_fn_data = ctx;
+
+	ret = walk_objects_by_path(&info);
+
+	/* Download the objects that did not fill a batch. */
+	if (!ret)
+		download_batch(ctx);
+
+	clear_backfill_context(ctx);
+	return ret;
+}
+
+int cmd_backfill(int argc, const char **argv, const char *prefix)
+{
+	struct backfill_context ctx = {
+		.repo = the_repository,
+		.current_batch = OID_ARRAY_INIT,
+		.batch_size = 16000,
+		.sparse = 0,
+	};
+	struct option options[] = {
+		OPT_INTEGER(0, "batch-size", &ctx.batch_size,
+			    N_("Minimun number of objects to request at a time")),
+		OPT_BOOL(0, "sparse", &ctx.sparse,
+			 N_("Restrict the missing objects to the current sparse-checkout")),
+		OPT_END(),
+	};
+
+	if (argc == 2 && !strcmp(argv[1], "-h"))
+		usage_with_options(builtin_backfill_usage, options);
+
+	argc = parse_options(argc, argv, prefix, options, builtin_backfill_usage,
+			     0);
+
+	git_config(git_default_config, NULL);
+
+	if (ctx.sparse < 0)
+		ctx.sparse = core_apply_sparse_checkout;
+
+	return do_backfill(&ctx);
+}
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 778be80f5646d1..7d1dd5a65577bc 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -39,6 +39,9 @@
 #include "promisor-remote.h"
 #include "pack-mtimes.h"
 #include "parse-options.h"
+#include "blob.h"
+#include "tree.h"
+#include "path-walk.h"
 
 /*
  * Objects we are going to pack are collected in the `to_pack` structure.
@@ -47,6 +50,9 @@
  */
 static struct packing_data to_pack;
 
+static FILE *delta_file;
+static int delta_file_nr;
+
 static inline struct object_entry *oe_delta(
 		const struct packing_data *pack,
 		const struct object_entry *e)
@@ -215,6 +221,7 @@ static int delta_search_threads;
 static int pack_to_stdout;
 static int sparse;
 static int thin;
+static int path_walk;
 static int num_preferred_base;
 static struct progress *progress_state;
 
@@ -266,6 +273,14 @@ struct configured_exclusion {
 static struct oidmap configured_exclusions;
 
 static struct oidset excluded_by_config;
+static int use_full_name_hash;
+
+static inline uint32_t pack_name_hash_fn(const char *name)
+{
+	if (use_full_name_hash)
+		return pack_full_name_hash(name);
+	return pack_name_hash(name);
+}
 
 /*
  * stats
@@ -504,6 +519,14 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
 	hdrlen = encode_in_pack_object_header(header, sizeof(header),
 					      type, size);
 
+	if (delta_file) {
+		if (delta_file_nr++)
+			fprintf(delta_file, ",\n");
+		fprintf(delta_file, "\t\t{\n");
+		fprintf(delta_file, "\t\t\t\"oid\" : \"%s\",\n", oid_to_hex(&entry->idx.oid));
+		fprintf(delta_file, "\t\t\t\"size\" : %"PRIuMAX",\n", datalen);
+	}
+
 	if (type == OBJ_OFS_DELTA) {
 		/*
 		 * Deltas with relative base contain an additional
@@ -524,6 +547,11 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
 		hashwrite(f, header, hdrlen);
 		hashwrite(f, dheader + pos, sizeof(dheader) - pos);
 		hdrlen += sizeof(dheader) - pos;
+		if (delta_file) {
+			fprintf(delta_file, "\t\t\t\"delta_type\" : \"OFS\",\n");
+			fprintf(delta_file, "\t\t\t\"offset\" : %"PRIuMAX",\n", ofs);
+			fprintf(delta_file, "\t\t\t\"delta_base\" : \"%s\",\n", oid_to_hex(&DELTA(entry)->idx.oid));
+		}
 	} else if (type == OBJ_REF_DELTA) {
 		/*
 		 * Deltas with a base reference contain
@@ -538,6 +566,10 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
 		hashwrite(f, header, hdrlen);
 		hashwrite(f, DELTA(entry)->idx.oid.hash, hashsz);
 		hdrlen += hashsz;
+		if (delta_file) {
+			fprintf(delta_file, "\t\t\t\"delta_type\" : \"REF\",\n");
+			fprintf(delta_file, "\t\t\t\"delta_base\" : \"%s\",\n", oid_to_hex(&DELTA(entry)->idx.oid));
+		}
 	} else {
 		if (limit && hdrlen + datalen + hashsz >= limit) {
 			if (st)
@@ -547,6 +579,10 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
 		}
 		hashwrite(f, header, hdrlen);
 	}
+
+	if (delta_file)
+		fprintf(delta_file, "\t\t\t\"reused\" : false\n\t\t}");
+
 	if (st) {
 		datalen = write_large_blob_data(st, f, &entry->idx.oid);
 		close_istream(st);
@@ -607,6 +643,14 @@ static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
 		return write_no_reuse_object(f, entry, limit, usable_delta);
 	}
 
+	if (delta_file) {
+		if (delta_file_nr++)
+			fprintf(delta_file, ",\n");
+		fprintf(delta_file, "\t\t{\n");
+		fprintf(delta_file, "\t\t\t\"oid\" : \"%s\",\n", oid_to_hex(&entry->idx.oid));
+		fprintf(delta_file, "\t\t\t\"size\" : %"PRIuMAX",\n", entry_size);
+	}
+
 	if (type == OBJ_OFS_DELTA) {
 		off_t ofs = entry->idx.offset - DELTA(entry)->idx.offset;
 		unsigned pos = sizeof(dheader) - 1;
@@ -621,6 +665,12 @@ static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
 		hashwrite(f, dheader + pos, sizeof(dheader) - pos);
 		hdrlen += sizeof(dheader) - pos;
 		reused_delta++;
+
+		if (delta_file) {
+			fprintf(delta_file, "\t\t\t\"delta_type\" : \"OFS\",\n");
+			fprintf(delta_file, "\t\t\t\"offset\" : %"PRIuMAX",\n", ofs);
+			fprintf(delta_file, "\t\t\t\"delta_base\" : \"%s\",\n", oid_to_hex(&DELTA(entry)->idx.oid));
+		}
 	} else if (type == OBJ_REF_DELTA) {
 		if (limit && hdrlen + hashsz + datalen + hashsz >= limit) {
 			unuse_pack(&w_curs);
@@ -630,6 +680,10 @@ static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
 		hashwrite(f, DELTA(entry)->idx.oid.hash, hashsz);
 		hdrlen += hashsz;
 		reused_delta++;
+		if (delta_file) {
+			fprintf(delta_file, "\t\t\t\"delta_type\" : \"REF\",\n");
+			fprintf(delta_file, "\t\t\t\"delta_base\" : \"%s\",\n", oid_to_hex(&DELTA(entry)->idx.oid));
+		}
 	} else {
 		if (limit && hdrlen + datalen + hashsz >= limit) {
 			unuse_pack(&w_curs);
@@ -640,6 +694,10 @@ static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
 	copy_pack_data(f, p, &w_curs, offset, datalen);
 	unuse_pack(&w_curs);
 	reused++;
+
+	if (delta_file)
+		fprintf(delta_file, "\t\t\t\"reused\" : true\n\t\t}");
+
 	return hdrlen + datalen;
 }
 
@@ -1252,6 +1310,11 @@ static void write_pack_file(void)
 	ALLOC_ARRAY(written_list, to_pack.nr_objects);
 	write_order = compute_write_order();
 
+	if (delta_file) {
+		fprintf(delta_file, "{\n\t\"num_objects\" : %"PRIu32",\n", to_pack.nr_objects);
+		fprintf(delta_file, "\t\"objects\" : [\n");
+	}
+
 	do {
 		unsigned char hash[GIT_MAX_RAWSZ];
 		char *pack_tmp_name = NULL;
@@ -1400,6 +1463,9 @@ static void write_pack_file(void)
 		    written, nr_result);
 	trace2_data_intmax("pack-objects", the_repository,
 			   "write_pack_file/wrote", nr_result);
+
+	if (delta_file)
+		fprintf(delta_file, "\n\t]\n}");
 }
 
 static int no_try_delta(const char *path)
@@ -1670,7 +1736,7 @@ static int add_object_entry(const struct object_id *oid, enum object_type type,
 		return 0;
 	}
 
-	create_object_entry(oid, type, pack_name_hash(name),
+	create_object_entry(oid, type, pack_name_hash_fn(name),
 			    exclude, name && no_try_delta(name),
 			    found_pack, found_offset);
 	return 1;
@@ -1884,7 +1950,7 @@ static void add_preferred_base_object(const char *name)
 {
 	struct pbase_tree *it;
 	size_t cmplen;
-	unsigned hash = pack_name_hash(name);
+	unsigned hash = pack_name_hash_fn(name);
 
 	if (!num_preferred_base || check_pbase_path(hash))
 		return;
@@ -3139,6 +3205,35 @@ static int add_ref_tag(const char *tag UNUSED, const char *referent UNUSED, cons
 	return 0;
 }
 
+static int should_attempt_deltas(struct object_entry *entry)
+{
+	if (DELTA(entry))
+		/* This happens if we decided to reuse existing
+		 * delta from a pack.  "reuse_delta &&" is implied.
+		 */
+		return 0;
+
+	if (!entry->type_valid || oe_size_less_than(&to_pack, entry, 50))
+		return 0;
+
+	if (entry->no_try_delta)
+		return 0;
+
+	if (!entry->preferred_base) {
+		if (oe_type(entry) < 0)
+			die(_("unable to get type of object %s"),
+				oid_to_hex(&entry->idx.oid));
+	} else if (oe_type(entry) < 0) {
+		/*
+		 * This object is not found, but we
+		 * don't have to include it anyway.
+		 */
+		return 0;
+	}
+
+	return 1;
+}
+
 static void prepare_pack(int window, int depth)
 {
 	struct object_entry **delta_list;
@@ -3169,33 +3264,11 @@ static void prepare_pack(int window, int depth)
 	for (i = 0; i < to_pack.nr_objects; i++) {
 		struct object_entry *entry = to_pack.objects + i;
 
-		if (DELTA(entry))
-			/* This happens if we decided to reuse existing
-			 * delta from a pack.  "reuse_delta &&" is implied.
-			 */
-			continue;
-
-		if (!entry->type_valid ||
-		    oe_size_less_than(&to_pack, entry, 50))
-			continue;
-
-		if (entry->no_try_delta)
+		if (!should_attempt_deltas(entry))
 			continue;
 
-		if (!entry->preferred_base) {
+		if (!entry->preferred_base)
 			nr_deltas++;
-			if (oe_type(entry) < 0)
-				die(_("unable to get type of object %s"),
-				    oid_to_hex(&entry->idx.oid));
-		} else {
-			if (oe_type(entry) < 0) {
-				/*
-				 * This object is not found, but we
-				 * don't have to include it anyway.
-				 */
-				continue;
-			}
-		}
 
 		delta_list[n++] = entry;
 	}
@@ -3394,7 +3467,7 @@ static void show_object_pack_hint(struct object *object, const char *name,
 	 * here using a now in order to perhaps improve the delta selection
 	 * process.
 	 */
-	oe->hash = pack_name_hash(name);
+	oe->hash = pack_name_hash_fn(name);
 	oe->no_try_delta = name && no_try_delta(name);
 
 	stdin_packs_hints_nr++;
@@ -3544,7 +3617,7 @@ static void add_cruft_object_entry(const struct object_id *oid, enum object_type
 	entry = packlist_find(&to_pack, oid);
 	if (entry) {
 		if (name) {
-			entry->hash = pack_name_hash(name);
+			entry->hash = pack_name_hash_fn(name);
 			entry->no_try_delta = no_try_delta(name);
 		}
 	} else {
@@ -3567,7 +3640,7 @@ static void add_cruft_object_entry(const struct object_id *oid, enum object_type
 			return;
 		}
 
-		entry = create_object_entry(oid, type, pack_name_hash(name),
+		entry = create_object_entry(oid, type, pack_name_hash_fn(name),
 					    0, name && no_try_delta(name),
 					    pack, offset);
 	}
@@ -4110,6 +4183,117 @@ static void mark_bitmap_preferred_tips(void)
 	}
 }
 
+static inline int is_oid_interesting(struct repository *repo,
+				     struct object_id *oid,
+				     enum object_type type)
+{
+	if (type == OBJ_TAG) {
+		struct tag *t = lookup_tag(repo, oid);
+		return t && !(t->object.flags & UNINTERESTING);
+	}
+
+	if (type == OBJ_COMMIT) {
+		struct commit *c = lookup_commit(repo, oid);
+		return c && !(c->object.flags & UNINTERESTING);
+	}
+
+	if (type == OBJ_TREE) {
+		struct tree *t = lookup_tree(repo, oid);
+		return t && !(t->object.flags & UNINTERESTING);
+	}
+
+	if (type == OBJ_BLOB) {
+		struct blob *b = lookup_blob(repo, oid);
+		return b && !(b->object.flags & UNINTERESTING);
+	}
+
+	return 0;
+}
+
+static int add_objects_by_path(const char *path,
+			       struct oid_array *oids,
+			       enum object_type type,
+			       void *data)
+{
+	struct object_entry **delta_list;
+	size_t oe_start = to_pack.nr_objects;
+	size_t oe_end;
+	unsigned int sub_list_size;
+	unsigned int *processed = data;
+
+	/*
+	 * First, add all objects to the packing data, including the ones
+	 * marked UNINTERESTING (translated to 'exclude') as they can be
+	 * used as delta bases.
+	 */
+	for (size_t i = 0; i < oids->nr; i++) {
+		struct object_id *oid = &oids->oid[i];
+		int exclude = !is_oid_interesting(the_repository, oid, type);
+		add_object_entry(oid, type, path, exclude);
+	}
+
+	oe_end = to_pack.nr_objects;
+
+	/* We can skip delta calculations if it is a no-op. */
+	if (oe_end == oe_start || !window)
+		return 0;
+
+	sub_list_size = 0;
+	ALLOC_ARRAY(delta_list, oe_end - oe_start);
+
+	for (size_t i = 0; i < oe_end - oe_start; i++) {
+		struct object_entry *entry = to_pack.objects + oe_start + i;
+
+		if (!should_attempt_deltas(entry))
+			continue;
+
+		delta_list[sub_list_size++] = entry;
+	}
+
+	/*
+	 * Find delta bases among this list of objects that all match the same
+	 * path. This causes the delta compression to be interleaved in the
+	 * object walk, which can lead to confusing progress indicators. This is
+	 * also incompatible with threaded delta calculations. In the future,
+	 * consider creating a list of regions in the full to_pack.objects array
+	 * that could be picked up by the threaded delta computation.
+	 */
+	if (sub_list_size && window) {
+		QSORT(delta_list, sub_list_size, type_size_sort);
+		find_deltas(delta_list, &sub_list_size, window, depth, processed);
+	}
+
+	free(delta_list);
+	return 0;
+}
+
+static void get_object_list_path_walk(struct rev_info *revs)
+{
+	struct path_walk_info info = PATH_WALK_INFO_INIT;
+	unsigned int processed = 0;
+
+	info.revs = revs;
+
+	info.revs->tag_objects = 1;
+	info.tags = 1;
+	info.commits = 1;
+	info.trees = 1;
+	info.blobs = 1;
+	info.path_fn = add_objects_by_path;
+	info.path_fn_data = &processed;
+
+	/*
+	 * Allow the --[no-]sparse option to be interesting here, if only
+	 * for testing purposes. Paths with no interesting objects will not
+	 * contribute to the resulting pack, but only create noisy preferred
+	 * base objects.
+	 */
+	info.prune_all_uninteresting = sparse;
+
+	if (walk_objects_by_path(&info))
+		die(_("failed to pack objects via path-walk"));
+}
+
 static void get_object_list(struct rev_info *revs, int ac, const char **av)
 {
 	struct setup_revision_opt s_r_opt = {
@@ -4156,7 +4340,7 @@ static void get_object_list(struct rev_info *revs, int ac, const char **av)
 
 	warn_on_object_refname_ambiguity = save_warning;
 
-	if (use_bitmap_index && !get_object_list_from_bitmap(revs))
+	if (use_bitmap_index && !path_walk && !get_object_list_from_bitmap(revs))
 		return;
 
 	if (use_delta_islands)
@@ -4165,15 +4349,19 @@ static void get_object_list(struct rev_info *revs, int ac, const char **av)
 	if (write_bitmap_index)
 		mark_bitmap_preferred_tips();
 
-	if (prepare_revision_walk(revs))
-		die(_("revision walk setup failed"));
-	mark_edges_uninteresting(revs, show_edge, sparse);
-
 	if (!fn_show_object)
 		fn_show_object = show_object;
-	traverse_commit_list(revs,
-			     show_commit, fn_show_object,
-			     NULL);
+
+	if (path_walk) {
+		get_object_list_path_walk(revs);
+	} else {
+		if (prepare_revision_walk(revs))
+			die(_("revision walk setup failed"));
+		mark_edges_uninteresting(revs, show_edge, sparse);
+		traverse_commit_list(revs,
+				show_commit, fn_show_object,
+				NULL);
+	}
 
 	if (unpack_unreachable_expiration) {
 		revs->ignore_missing_links = 1;
@@ -4296,6 +4484,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 	struct string_list keep_pack_list = STRING_LIST_INIT_NODUP;
 	struct list_objects_filter_options filter_options =
 		LIST_OBJECTS_FILTER_INIT;
+	const char *delta_file_name = NULL;
 
 	struct option pack_objects_options[] = {
 		OPT_CALLBACK_F('q', "quiet", &progress, NULL,
@@ -4368,6 +4557,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 			 N_("use the sparse reachability algorithm")),
 		OPT_BOOL(0, "thin", &thin,
 			 N_("create thin packs")),
+		OPT_BOOL(0, "path-walk", &path_walk,
+			 N_("use the path-walk API to walk objects when possible")),
 		OPT_BOOL(0, "shallow", &shallow,
 			 N_("create packs suitable for shallow fetches")),
 		OPT_BOOL(0, "honor-pack-keep", &ignore_packed_keep_on_disk,
@@ -4398,6 +4589,11 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 		OPT_STRING_LIST(0, "uri-protocol", &uri_protocols,
 				N_("protocol"),
 				N_("exclude any configured uploadpack.blobpackfileuri with this protocol")),
+		OPT_BOOL(0, "full-name-hash", &use_full_name_hash,
+			 N_("optimize delta compression across identical path names over time")),
+		OPT_STRING(0, "delta-file", &delta_file_name,
+				N_("filename"),
+				N_("output delta compression details to the given file")),
 		OPT_END(),
 	};
 
@@ -4406,11 +4602,14 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 
 	disable_replace_refs();
 
+	path_walk = git_env_bool("GIT_TEST_PACK_PATH_WALK", -1);
 	sparse = git_env_bool("GIT_TEST_PACK_SPARSE", -1);
 	if (the_repository->gitdir) {
 		prepare_repo_settings(the_repository);
 		if (sparse < 0)
 			sparse = the_repository->settings.pack_use_sparse;
+		if (path_walk < 0)
+			path_walk = the_repository->settings.pack_use_path_walk;
 		if (the_repository->settings.pack_use_multi_pack_reuse)
 			allow_pack_reuse = MULTI_PACK_REUSE;
 	}
@@ -4432,6 +4631,12 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 	if (pack_to_stdout != !base_name || argc)
 		usage_with_options(pack_usage, pack_objects_options);
 
+	if (delta_file_name) {
+		delta_file = fopen(delta_file_name, "w");
+		if (!delta_file)
+			die_errno("failed to open '%s'", delta_file_name);
+		trace2_printf("opened '%s' for writing deltas", delta_file_name);
+	}
 	if (depth < 0)
 		depth = 0;
 	if (depth >= (1 << OE_DEPTH_BITS)) {
@@ -4448,7 +4653,19 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 		window = 0;
 
 	strvec_push(&rp, "pack-objects");
-	if (thin) {
+
+	if (path_walk && filter_options.choice) {
+		warning(_("cannot use --filter with --path-walk"));
+		path_walk = 0;
+	}
+	if (path_walk) {
+		strvec_push(&rp, "--boundary");
+		 /*
+		  * We must disable the bitmaps because we are removing
+		  * the --objects / --objects-edge[-aggressive] options.
+		  */
+		use_bitmap_index = 0;
+	} else if (thin) {
 		use_internal_rev_list = 1;
 		strvec_push(&rp, shallow
 				? "--objects-edge-aggressive"
@@ -4643,5 +4860,10 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 	list_objects_filter_release(&filter_options);
 	strvec_clear(&rp);
 
+	if (delta_file) {
+		fflush(delta_file);
+		fclose(delta_file);
+	}
+
 	return 0;
 }
diff --git a/builtin/repack.c b/builtin/repack.c
index 62cfa50c50f893..a1ab103e62d0ce 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -57,6 +57,8 @@ struct pack_objects_args {
 	int no_reuse_object;
 	int quiet;
 	int local;
+	int path_walk;
+	int full_name_hash;
 	struct list_objects_filter_options filter_options;
 };
 
@@ -288,6 +290,10 @@ static void prepare_pack_objects(struct child_process *cmd,
 		strvec_pushf(&cmd->args, "--no-reuse-delta");
 	if (args->no_reuse_object)
 		strvec_pushf(&cmd->args, "--no-reuse-object");
+	if (args->path_walk)
+		strvec_pushf(&cmd->args, "--path-walk");
+	if (args->full_name_hash)
+		strvec_pushf(&cmd->args, "--full-name-hash");
 	if (args->local)
 		strvec_push(&cmd->args,  "--local");
 	if (args->quiet)
@@ -1158,6 +1164,10 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
 				N_("pass --no-reuse-delta to git-pack-objects")),
 		OPT_BOOL('F', NULL, &po_args.no_reuse_object,
 				N_("pass --no-reuse-object to git-pack-objects")),
+		OPT_BOOL(0, "path-walk", &po_args.path_walk,
+				N_("pass --path-walk to git-pack-objects")),
+		OPT_BOOL(0, "full-name-hash", &po_args.full_name_hash,
+				N_("pass --full-name-hash to git-pack-objects")),
 		OPT_NEGBIT('n', NULL, &run_update_server_info,
 				N_("do not run git-update-server-info"), 1),
 		OPT__QUIET(&po_args.quiet, N_("be quiet")),
diff --git a/builtin/survey.c b/builtin/survey.c
new file mode 100644
index 00000000000000..90b041967c887e
--- /dev/null
+++ b/builtin/survey.c
@@ -0,0 +1,965 @@
+#include "builtin.h"
+#include "config.h"
+#include "environment.h"
+#include "hex.h"
+#include "object.h"
+#include "object-name.h"
+#include "object-store-ll.h"
+#include "parse-options.h"
+#include "path-walk.h"
+#include "progress.h"
+#include "ref-filter.h"
+#include "refs.h"
+#include "revision.h"
+#include "strbuf.h"
+#include "strvec.h"
+#include "tag.h"
+#include "trace2.h"
+
+static const char * const survey_usage[] = {
+	N_("(EXPERIMENTAL!) git survey <options>"),
+	NULL,
+};
+
+struct survey_refs_wanted {
+	int want_all_refs; /* special override */
+
+	int want_branches;
+	int want_tags;
+	int want_remotes;
+	int want_detached;
+	int want_other; /* see FILTER_REFS_OTHERS -- refs/notes/, refs/stash/ */
+};
+
+static struct survey_refs_wanted default_ref_options = {
+	.want_all_refs = 1,
+};
+
+enum survey_format {
+	SURVEY_PLAINTEXT = 0,
+	SURVEY_JSON = 1,
+};
+
+struct survey_opts {
+	int verbose;
+	int show_progress;
+	struct survey_refs_wanted refs;
+	enum survey_format format;
+};
+
+struct survey_report_ref_summary {
+	size_t refs_nr;
+	size_t branches_nr;
+	size_t remote_refs_nr;
+	size_t tags_nr;
+	size_t tags_annotated_nr;
+	size_t others_nr;
+	size_t unknown_nr;
+};
+
+struct survey_report_object_summary {
+	size_t commits_nr;
+	size_t tags_nr;
+	size_t trees_nr;
+	size_t blobs_nr;
+};
+
+/**
+ * For some category given by 'label', count the number of objects
+ * that match that label along with the on-disk size and the size
+ * after decompressing (both with delta bases and zlib).
+ */
+struct survey_report_object_size_summary {
+	char *label;
+	size_t nr;
+	size_t disk_size;
+	size_t inflated_size;
+	size_t num_missing;
+};
+
+typedef int (*survey_top_size_cmp)(struct survey_report_object_size_summary *s1,
+				   struct survey_report_object_size_summary *s2);
+
+static int cmp_by_nr(struct survey_report_object_size_summary *s1,
+		     struct survey_report_object_size_summary *s2)
+{
+	if (s1->nr < s2->nr)
+		return -1;
+	if (s1->nr > s2->nr)
+		return 1;
+	return 0;
+}
+
+static int cmp_by_disk_size(struct survey_report_object_size_summary *s1,
+			    struct survey_report_object_size_summary *s2)
+{
+	if (s1->disk_size < s2->disk_size)
+		return -1;
+	if (s1->disk_size > s2->disk_size)
+		return 1;
+	return 0;
+}
+
+static int cmp_by_inflated_size(struct survey_report_object_size_summary *s1,
+				struct survey_report_object_size_summary *s2)
+{
+	if (s1->inflated_size < s2->inflated_size)
+		return -1;
+	if (s1->inflated_size > s2->inflated_size)
+		return 1;
+	return 0;
+}
+
+/**
+ * Store a list of "top" categories by some sorting function. When
+ * inserting a new category, reorder the list and free the one that
+ * got ejected (if any).
+ */
+struct survey_report_top_sizes {
+	const char *name;
+	survey_top_size_cmp cmp_fn;
+	struct survey_report_object_size_summary *data;
+	size_t nr;
+	size_t alloc;
+};
+
+static void init_top_sizes(struct survey_report_top_sizes *top,
+			   size_t limit, const char *name,
+			   survey_top_size_cmp cmp)
+{
+	top->name = name;
+	top->alloc = limit;
+	top->nr = 0;
+	CALLOC_ARRAY(top->data, limit);
+	top->cmp_fn = cmp;
+}
+
+MAYBE_UNUSED
+static void clear_top_sizes(struct survey_report_top_sizes *top)
+{
+	for (size_t i = 0; i < top->nr; i++)
+		free(top->data[i].label);
+	free(top->data);
+}
+
+static void maybe_insert_into_top_size(struct survey_report_top_sizes *top,
+				       struct survey_report_object_size_summary *summary)
+{
+	size_t pos = top->nr;
+
+	/* Compare against list from the bottom. */
+	while (pos > 0 && top->cmp_fn(&top->data[pos - 1], summary) < 0)
+		pos--;
+
+	/* Not big enough! */
+	if (pos >= top->alloc)
+		return;
+
+	/* We need to shift the data. */
+	if (top->nr == top->alloc)
+		free(top->data[top->nr - 1].label);
+	else
+		top->nr++;
+
+	for (size_t i = top->nr - 1; i > pos; i--)
+		memcpy(&top->data[i], &top->data[i - 1], sizeof(*top->data));
+
+	memcpy(&top->data[pos], summary, sizeof(*summary));
+	top->data[pos].label = xstrdup(summary->label);
+}
+
+/**
+ * This struct contains all of the information that needs to be printed
+ * at the end of the exploration of the repository and its references.
+ */
+struct survey_report {
+	struct survey_report_ref_summary refs;
+	struct survey_report_object_summary reachable_objects;
+
+	struct survey_report_object_size_summary *by_type;
+
+	struct survey_report_top_sizes *top_paths_by_count;
+	struct survey_report_top_sizes *top_paths_by_disk;
+	struct survey_report_top_sizes *top_paths_by_inflate;
+};
+
+#define REPORT_TYPE_COMMIT 0
+#define REPORT_TYPE_TREE 1
+#define REPORT_TYPE_BLOB 2
+#define REPORT_TYPE_COUNT 3
+
+struct survey_context {
+	/* Options that control what is done. */
+	struct survey_opts opts;
+
+	/* Info for output only. */
+	struct survey_report report;
+
+	/*
+	 * The rest of the members are about enabling the activity
+	 * of the 'git survey' command, including ref listings, object
+	 * pointers, and progress.
+	 */
+
+	struct repository *repo;
+
+	struct progress *progress;
+	size_t progress_nr;
+	size_t progress_total;
+
+	struct strvec refs;
+	struct ref_array ref_array;
+};
+
+static void clear_survey_context(struct survey_context *ctx)
+{
+	ref_array_clear(&ctx->ref_array);
+	strvec_clear(&ctx->refs);
+}
+
+struct survey_table {
+	const char *table_name;
+	struct strvec header;
+	struct strvec *rows;
+	size_t rows_nr;
+	size_t rows_alloc;
+};
+
+#define SURVEY_TABLE_INIT {	\
+	.header = STRVEC_INIT,	\
+}
+
+static void clear_table(struct survey_table *table)
+{
+	strvec_clear(&table->header);
+	for (size_t i = 0; i < table->rows_nr; i++)
+		strvec_clear(&table->rows[i]);
+	free(table->rows);
+}
+
+static void insert_table_rowv(struct survey_table *table, ...)
+{
+	va_list ap;
+	char *arg;
+	ALLOC_GROW(table->rows, table->rows_nr + 1, table->rows_alloc);
+
+	memset(&table->rows[table->rows_nr], 0, sizeof(struct strvec));
+
+	va_start(ap, table);
+	while ((arg = va_arg(ap, char *)))
+		strvec_push(&table->rows[table->rows_nr], arg);
+	va_end(ap);
+
+	table->rows_nr++;
+}
+
+static void print_table_title(const char *name, size_t *widths, size_t nr)
+{
+	static struct strbuf lines = STRBUF_INIT;
+	size_t width = 0;
+	size_t min_width;
+	strbuf_setlen(&lines, 0);
+
+	strbuf_addch(&lines, '\n');
+	strbuf_addstr(&lines, name);
+	min_width = lines.len - 1;
+	strbuf_addch(&lines, '\n');
+
+	for (size_t i = 0; i < nr; i++) {
+		if (i)
+			width += 3;
+		width += widths[i];
+	}
+
+	if (width < min_width)
+		width = min_width;
+
+	strbuf_addchars(&lines, '=', width);
+	printf("%s\n", lines.buf);
+}
+
+static void print_row_plaintext(struct strvec *row, size_t *widths)
+{
+	static struct strbuf line = STRBUF_INIT;
+	strbuf_setlen(&line, 0);
+
+	for (size_t i = 0; i < row->nr; i++) {
+		const char *str = row->v[i];
+		size_t len = strlen(str);
+		if (i)
+			strbuf_add(&line, " | ", 3);
+		strbuf_addchars(&line, ' ', widths[i] - len);
+		strbuf_add(&line, str, len);
+	}
+	printf("%s\n", line.buf);
+}
+
+static void print_divider_plaintext(size_t *widths, size_t nr)
+{
+	static struct strbuf line = STRBUF_INIT;
+	strbuf_setlen(&line, 0);
+
+	for (size_t i = 0; i < nr; i++) {
+		if (i)
+			strbuf_add(&line, "-+-", 3);
+		strbuf_addchars(&line, '-', widths[i]);
+	}
+	printf("%s\n", line.buf);
+}
+
+static void print_table_plaintext(struct survey_table *table)
+{
+	size_t *column_widths;
+	size_t columns_nr = table->header.nr;
+	CALLOC_ARRAY(column_widths, columns_nr);
+
+	for (size_t i = 0; i < columns_nr; i++) {
+		column_widths[i] = strlen(table->header.v[i]);
+
+		for (size_t j = 0; j < table->rows_nr; j++) {
+			size_t rowlen = strlen(table->rows[j].v[i]);
+			if (column_widths[i] < rowlen)
+				column_widths[i] = rowlen;
+		}
+	}
+
+	print_table_title(table->table_name, column_widths, columns_nr);
+	print_row_plaintext(&table->header, column_widths);
+	print_divider_plaintext(column_widths, columns_nr);
+
+	for (size_t j = 0; j < table->rows_nr; j++)
+		print_row_plaintext(&table->rows[j], column_widths);
+}
+
+static void survey_report_plaintext_refs(struct survey_context *ctx)
+{
+	struct survey_report_ref_summary *refs = &ctx->report.refs;
+	struct survey_table table = SURVEY_TABLE_INIT;
+
+	table.table_name = _("REFERENCES SUMMARY");
+
+	strvec_push(&table.header, _("Ref Type"));
+	strvec_push(&table.header, _("Count"));
+
+	if (ctx->opts.refs.want_all_refs || ctx->opts.refs.want_branches) {
+		char *fmt = xstrfmt("%"PRIuMAX"", refs->branches_nr);
+		insert_table_rowv(&table, _("Branches"), fmt, NULL);
+		free(fmt);
+	}
+
+	if (ctx->opts.refs.want_all_refs || ctx->opts.refs.want_remotes) {
+		char *fmt = xstrfmt("%"PRIuMAX"", refs->remote_refs_nr);
+		insert_table_rowv(&table, _("Remote refs"), fmt, NULL);
+		free(fmt);
+	}
+
+	if (ctx->opts.refs.want_all_refs || ctx->opts.refs.want_tags) {
+		char *fmt = xstrfmt("%"PRIuMAX"", refs->tags_nr);
+		insert_table_rowv(&table, _("Tags (all)"), fmt, NULL);
+		free(fmt);
+		fmt = xstrfmt("%"PRIuMAX"", refs->tags_annotated_nr);
+		insert_table_rowv(&table, _("Tags (annotated)"), fmt, NULL);
+		free(fmt);
+	}
+
+	print_table_plaintext(&table);
+	clear_table(&table);
+}
+
+static void survey_report_plaintext_reachable_object_summary(struct survey_context *ctx)
+{
+	struct survey_report_object_summary *objs = &ctx->report.reachable_objects;
+	struct survey_table table = SURVEY_TABLE_INIT;
+	char *fmt;
+
+	table.table_name = _("REACHABLE OBJECT SUMMARY");
+
+	strvec_push(&table.header, _("Object Type"));
+	strvec_push(&table.header, _("Count"));
+
+	fmt = xstrfmt("%"PRIuMAX"", objs->tags_nr);
+	insert_table_rowv(&table, _("Tags"), fmt, NULL);
+	free(fmt);
+
+	fmt = xstrfmt("%"PRIuMAX"", objs->commits_nr);
+	insert_table_rowv(&table, _("Commits"), fmt, NULL);
+	free(fmt);
+
+	fmt = xstrfmt("%"PRIuMAX"", objs->trees_nr);
+	insert_table_rowv(&table, _("Trees"), fmt, NULL);
+	free(fmt);
+
+	fmt = xstrfmt("%"PRIuMAX"", objs->blobs_nr);
+	insert_table_rowv(&table, _("Blobs"), fmt, NULL);
+	free(fmt);
+
+	print_table_plaintext(&table);
+	clear_table(&table);
+}
+
+static void survey_report_object_sizes(const char *title,
+				       const char *categories,
+				       struct survey_report_object_size_summary *summary,
+				       size_t summary_nr)
+{
+	struct survey_table table = SURVEY_TABLE_INIT;
+	table.table_name = title;
+
+	strvec_push(&table.header, xstrdup(categories));
+	strvec_push(&table.header, xstrdup(_("Count")));
+	strvec_push(&table.header, xstrdup(_("Disk Size")));
+	strvec_push(&table.header, xstrdup(_("Inflated Size")));
+
+	for (size_t i = 0; i < summary_nr; i++) {
+		insert_table_rowv(&table, xstrdup(summary[i].label),
+				  xstrfmt("%"PRIuMAX, summary[i].nr),
+				  xstrfmt("%"PRIuMAX, summary[i].disk_size),
+				  xstrfmt("%"PRIuMAX, summary[i].inflated_size),
+				  NULL);
+	}
+
+	print_table_plaintext(&table);
+	clear_table(&table);
+}
+
+static void survey_report_plaintext_sorted_size(
+		struct survey_report_top_sizes *top)
+{
+	survey_report_object_sizes(top->name,  _("Path"),
+				   top->data, top->nr);
+}
+
+static void survey_report_plaintext(struct survey_context *ctx)
+{
+	printf("GIT SURVEY for \"%s\"\n", ctx->repo->worktree);
+	printf("-----------------------------------------------------\n");
+	survey_report_plaintext_refs(ctx);
+	survey_report_plaintext_reachable_object_summary(ctx);
+	survey_report_object_sizes(_("TOTAL OBJECT SIZES BY TYPE"),
+				   _("Object Type"),
+				   ctx->report.by_type,
+				   REPORT_TYPE_COUNT);
+
+	survey_report_plaintext_sorted_size(
+		&ctx->report.top_paths_by_count[REPORT_TYPE_TREE]);
+	survey_report_plaintext_sorted_size(
+		&ctx->report.top_paths_by_count[REPORT_TYPE_BLOB]);
+
+	survey_report_plaintext_sorted_size(
+		&ctx->report.top_paths_by_disk[REPORT_TYPE_TREE]);
+	survey_report_plaintext_sorted_size(
+		&ctx->report.top_paths_by_disk[REPORT_TYPE_BLOB]);
+
+	survey_report_plaintext_sorted_size(
+		&ctx->report.top_paths_by_inflate[REPORT_TYPE_TREE]);
+	survey_report_plaintext_sorted_size(
+		&ctx->report.top_paths_by_inflate[REPORT_TYPE_BLOB]);
+}
+
+static void survey_report_json(struct survey_context *ctx)
+{
+	/* TODO. */
+}
+
+/*
+ * After parsing the command line arguments, figure out which refs we
+ * should scan.
+ *
+ * If ANY were given in positive sense, then we ONLY include them and
+ * do not use the builtin values.
+ */
+static void fixup_refs_wanted(struct survey_context *ctx)
+{
+	struct survey_refs_wanted *rw = &ctx->opts.refs;
+
+	/*
+	 * `--all-refs` overrides and enables everything.
+	 */
+	if (rw->want_all_refs == 1) {
+		rw->want_branches = 1;
+		rw->want_tags = 1;
+		rw->want_remotes = 1;
+		rw->want_detached = 1;
+		rw->want_other = 1;
+		return;
+	}
+
+	/*
+	 * If none of the `--<ref-type>` were given, we assume all
+	 * of the builtin unspecified values.
+	 */
+	if (rw->want_branches == -1 &&
+	    rw->want_tags == -1 &&
+	    rw->want_remotes == -1 &&
+	    rw->want_detached == -1 &&
+	    rw->want_other == -1) {
+		*rw = default_ref_options;
+		return;
+	}
+
+	/*
+	 * Since we only allow positive boolean values on the command
+	 * line, we will only have true values where they specified
+	 * a `--<ref-type>`.
+	 *
+	 * So anything that still has an unspecified value should be
+	 * set to false.
+	 */
+	if (rw->want_branches == -1)
+		rw->want_branches = 0;
+	if (rw->want_tags == -1)
+		rw->want_tags = 0;
+	if (rw->want_remotes == -1)
+		rw->want_remotes = 0;
+	if (rw->want_detached == -1)
+		rw->want_detached = 0;
+	if (rw->want_other == -1)
+		rw->want_other = 0;
+}
+
+static int survey_load_config_cb(const char *var, const char *value,
+				 const struct config_context *cctx, void *pvoid)
+{
+	struct survey_context *sctx = pvoid;
+	if (!strcmp(var, "survey.verbose")) {
+		sctx->opts.verbose = git_config_bool(var, value);
+		return 0;
+	}
+	if (!strcmp(var, "survey.progress")) {
+		sctx->opts.show_progress = git_config_bool(var, value);
+		return 0;
+	}
+
+	return git_default_config(var, value, cctx, pvoid);
+}
+
+static void survey_load_config(struct survey_context *ctx)
+{
+	git_config(survey_load_config_cb, ctx);
+}
+
+static void do_load_refs(struct survey_context *ctx,
+			 struct ref_array *ref_array)
+{
+	struct ref_filter filter = REF_FILTER_INIT;
+	struct ref_sorting *sorting;
+	struct string_list sorting_options = STRING_LIST_INIT_DUP;
+
+	string_list_append(&sorting_options, "objectname");
+	sorting = ref_sorting_options(&sorting_options);
+
+	if (ctx->opts.refs.want_detached)
+		strvec_push(&ctx->refs, "HEAD");
+
+	if (ctx->opts.refs.want_all_refs) {
+		strvec_push(&ctx->refs, "refs/");
+	} else {
+		if (ctx->opts.refs.want_branches)
+			strvec_push(&ctx->refs, "refs/heads/");
+		if (ctx->opts.refs.want_tags)
+			strvec_push(&ctx->refs, "refs/tags/");
+		if (ctx->opts.refs.want_remotes)
+			strvec_push(&ctx->refs, "refs/remotes/");
+		if (ctx->opts.refs.want_other) {
+			strvec_push(&ctx->refs, "refs/notes/");
+			strvec_push(&ctx->refs, "refs/stash/");
+		}
+	}
+
+	filter.name_patterns = ctx->refs.v;
+	filter.ignore_case = 0;
+	filter.match_as_path = 1;
+
+	if (ctx->opts.show_progress) {
+		ctx->progress_total = 0;
+		ctx->progress = start_progress(_("Scanning refs..."), 0);
+	}
+
+	filter_refs(ref_array, &filter, FILTER_REFS_KIND_MASK);
+
+	if (ctx->opts.show_progress) {
+		ctx->progress_total = ref_array->nr;
+		display_progress(ctx->progress, ctx->progress_total);
+	}
+
+	ref_array_sort(sorting, ref_array);
+
+	stop_progress(&ctx->progress);
+	ref_filter_clear(&filter);
+	ref_sorting_release(sorting);
+}
+
+/*
+ * The REFS phase:
+ *
+ * Load the set of requested refs and assess them for scalablity problems.
+ * Use that set to start a treewalk to all reachable objects and assess
+ * them.
+ *
+ * This data will give us insights into the repository itself (the number
+ * of refs, the size and shape of the DAG, the number and size of the
+ * objects).
+ *
+ * Theoretically, this data is independent of the on-disk representation
+ * (e.g. independent of packing concerns).
+ */
+static void survey_phase_refs(struct survey_context *ctx)
+{
+	trace2_region_enter("survey", "phase/refs", ctx->repo);
+	do_load_refs(ctx, &ctx->ref_array);
+
+	ctx->report.refs.refs_nr = ctx->ref_array.nr;
+	for (size_t i = 0; i < ctx->ref_array.nr; i++) {
+		size_t size;
+		struct ref_array_item *item = ctx->ref_array.items[i];
+
+		switch (item->kind) {
+		case FILTER_REFS_TAGS:
+			ctx->report.refs.tags_nr++;
+			if (oid_object_info(ctx->repo,
+					    &item->objectname,
+					    &size) == OBJ_TAG)
+				ctx->report.refs.tags_annotated_nr++;
+			break;
+
+		case FILTER_REFS_BRANCHES:
+			ctx->report.refs.branches_nr++;
+			break;
+
+		case FILTER_REFS_REMOTES:
+			ctx->report.refs.remote_refs_nr++;
+			break;
+
+		case FILTER_REFS_OTHERS:
+			ctx->report.refs.others_nr++;
+			break;
+
+		default:
+			ctx->report.refs.unknown_nr++;
+			break;
+		}
+	}
+
+	trace2_region_leave("survey", "phase/refs", ctx->repo);
+}
+
+static void increment_object_counts(
+		struct survey_report_object_summary *summary,
+		enum object_type type,
+		size_t nr)
+{
+	switch (type) {
+	case OBJ_COMMIT:
+		summary->commits_nr += nr;
+		break;
+
+	case OBJ_TREE:
+		summary->trees_nr += nr;
+		break;
+
+	case OBJ_BLOB:
+		summary->blobs_nr += nr;
+		break;
+
+	default:
+		break;
+	}
+}
+
+static void increment_totals(struct survey_context *ctx,
+			     struct oid_array *oids,
+			     struct survey_report_object_size_summary *summary)
+{
+	for (size_t i = 0; i < oids->nr; i++) {
+		struct object_info oi = OBJECT_INFO_INIT;
+		unsigned oi_flags = OBJECT_INFO_FOR_PREFETCH;
+		unsigned long object_length = 0;
+		off_t disk_sizep = 0;
+		enum object_type type;
+
+		oi.typep = &type;
+		oi.sizep = &object_length;
+		oi.disk_sizep = &disk_sizep;
+
+		if (oid_object_info_extended(ctx->repo, &oids->oid[i],
+					     &oi, oi_flags) < 0) {
+			summary->num_missing++;
+		} else {
+			summary->nr++;
+			summary->disk_size += disk_sizep;
+			summary->inflated_size += object_length;
+		}
+	}
+}
+
+static void increment_object_totals(struct survey_context *ctx,
+				    struct oid_array *oids,
+				    enum object_type type,
+				    const char *path)
+{
+	struct survey_report_object_size_summary *total;
+	struct survey_report_object_size_summary summary = { 0 };
+
+	increment_totals(ctx, oids, &summary);
+
+	switch (type) {
+	case OBJ_COMMIT:
+		total = &ctx->report.by_type[REPORT_TYPE_COMMIT];
+		break;
+
+	case OBJ_TREE:
+		total = &ctx->report.by_type[REPORT_TYPE_TREE];
+		break;
+
+	case OBJ_BLOB:
+		total = &ctx->report.by_type[REPORT_TYPE_BLOB];
+		break;
+
+	default:
+		BUG("No other type allowed");
+	}
+
+	total->nr += summary.nr;
+	total->disk_size += summary.disk_size;
+	total->inflated_size += summary.inflated_size;
+	total->num_missing += summary.num_missing;
+
+	if (type == OBJ_TREE || type == OBJ_BLOB) {
+		int index = type == OBJ_TREE ?
+			    REPORT_TYPE_TREE : REPORT_TYPE_BLOB;
+		struct survey_report_top_sizes *top;
+
+		/*
+		 * Temporarily store (const char *) here, but it will
+		 * be duped if inserted and will not be freed.
+		 */
+		summary.label = (char *)path;
+
+		top = ctx->report.top_paths_by_count;
+		maybe_insert_into_top_size(&top[index], &summary);
+
+		top = ctx->report.top_paths_by_disk;
+		maybe_insert_into_top_size(&top[index], &summary);
+
+		top = ctx->report.top_paths_by_inflate;
+		maybe_insert_into_top_size(&top[index], &summary);
+	}
+}
+
+static int survey_objects_path_walk_fn(const char *path,
+				       struct oid_array *oids,
+				       enum object_type type,
+				       void *data)
+{
+	struct survey_context *ctx = data;
+
+	increment_object_counts(&ctx->report.reachable_objects,
+				type, oids->nr);
+	increment_object_totals(ctx, oids, type, path);
+
+	ctx->progress_nr += oids->nr;
+	display_progress(ctx->progress, ctx->progress_nr);
+
+	return 0;
+}
+
+static int iterate_tag_chain(struct survey_context *ctx,
+			     struct object_id *oid,
+			     struct object_id *peeled)
+{
+	struct object *o = lookup_unknown_object(ctx->repo, oid);
+	struct tag *t;
+
+	if (o->type != OBJ_TAG) {
+		oidcpy(peeled, &o->oid);
+		return o->type != OBJ_COMMIT;
+	}
+
+	t = lookup_tag(ctx->repo, oid);
+	while (t) {
+		parse_tag(t);
+		ctx->report.reachable_objects.tags_nr++;
+
+		if (!t->tagged)
+			break;
+
+		o = lookup_unknown_object(ctx->repo, &t->tagged->oid);
+		if (o && o->type == OBJ_TAG)
+			t = lookup_tag(ctx->repo, &t->tagged->oid);
+		else
+			break;
+	}
+
+	if (!t || !t->tagged)
+		return -1;
+
+	oidcpy(peeled, &t->tagged->oid);
+	o = lookup_unknown_object(ctx->repo, peeled);
+	if (o && o->type == OBJ_COMMIT)
+		return 0;
+	return -1;
+}
+
+static void initialize_report(struct survey_context *ctx)
+{
+	const int top_limit = 100;
+
+	CALLOC_ARRAY(ctx->report.by_type, REPORT_TYPE_COUNT);
+	ctx->report.by_type[REPORT_TYPE_COMMIT].label = xstrdup(_("Commits"));
+	ctx->report.by_type[REPORT_TYPE_TREE].label = xstrdup(_("Trees"));
+	ctx->report.by_type[REPORT_TYPE_BLOB].label = xstrdup(_("Blobs"));
+
+	CALLOC_ARRAY(ctx->report.top_paths_by_count, REPORT_TYPE_COUNT);
+	init_top_sizes(&ctx->report.top_paths_by_count[REPORT_TYPE_TREE],
+		       top_limit, _("TOP DIRECTORIES BY COUNT"), cmp_by_nr);
+	init_top_sizes(&ctx->report.top_paths_by_count[REPORT_TYPE_BLOB],
+		       top_limit, _("TOP FILES BY COUNT"), cmp_by_nr);
+
+	CALLOC_ARRAY(ctx->report.top_paths_by_disk, REPORT_TYPE_COUNT);
+	init_top_sizes(&ctx->report.top_paths_by_disk[REPORT_TYPE_TREE],
+		       top_limit, _("TOP DIRECTORIES BY DISK SIZE"), cmp_by_disk_size);
+	init_top_sizes(&ctx->report.top_paths_by_disk[REPORT_TYPE_BLOB],
+		       top_limit, _("TOP FILES BY DISK SIZE"), cmp_by_disk_size);
+
+	CALLOC_ARRAY(ctx->report.top_paths_by_inflate, REPORT_TYPE_COUNT);
+	init_top_sizes(&ctx->report.top_paths_by_inflate[REPORT_TYPE_TREE],
+		       top_limit, _("TOP DIRECTORIES BY INFLATED SIZE"), cmp_by_inflated_size);
+	init_top_sizes(&ctx->report.top_paths_by_inflate[REPORT_TYPE_BLOB],
+		       top_limit, _("TOP FILES BY INFLATED SIZE"), cmp_by_inflated_size);
+}
+
+static void survey_phase_objects(struct survey_context *ctx)
+{
+	struct rev_info revs = REV_INFO_INIT;
+	struct path_walk_info info = PATH_WALK_INFO_INIT;
+	unsigned int add_flags = 0;
+
+	trace2_region_enter("survey", "phase/objects", ctx->repo);
+
+	info.revs = &revs;
+	info.path_fn = survey_objects_path_walk_fn;
+	info.path_fn_data = ctx;
+
+	info.commits = 1;
+	info.trees = 1;
+	info.blobs = 1;
+	info.tags = 1;
+
+	initialize_report(ctx);
+
+	repo_init_revisions(ctx->repo, &revs, "");
+
+	ctx->progress_nr = 0;
+	ctx->progress_total = ctx->ref_array.nr;
+	if (ctx->opts.show_progress)
+		ctx->progress = start_progress(_("Preparing object walk"),
+					       ctx->progress_total);
+	for (size_t i = 0; i < ctx->ref_array.nr; i++) {
+		struct ref_array_item *item = ctx->ref_array.items[i];
+		struct object_id peeled;
+
+		switch (item->kind) {
+		case FILTER_REFS_TAGS:
+			if (!iterate_tag_chain(ctx, &item->objectname, &peeled))
+				add_pending_oid(&revs, NULL, &peeled, add_flags);
+			break;
+		case FILTER_REFS_BRANCHES:
+			add_pending_oid(&revs, NULL, &item->objectname, add_flags);
+			break;
+		case FILTER_REFS_REMOTES:
+			add_pending_oid(&revs, NULL, &item->objectname, add_flags);
+			break;
+		case FILTER_REFS_OTHERS:
+			/*
+			 * This may be a note, stash, or custom namespace branch.
+			 */
+			add_pending_oid(&revs, NULL, &item->objectname, add_flags);
+			break;
+		case FILTER_REFS_DETACHED_HEAD:
+			add_pending_oid(&revs, NULL, &item->objectname, add_flags);
+			break;
+		default:
+			break;
+		}
+
+		display_progress(ctx->progress, ++(ctx->progress_nr));
+	}
+	stop_progress(&ctx->progress);
+
+	ctx->progress_nr = 0;
+	ctx->progress_total = 0;
+	if (ctx->opts.show_progress)
+		ctx->progress = start_progress(_("Walking objects"), 0);
+	walk_objects_by_path(&info);
+	stop_progress(&ctx->progress);
+
+	release_revisions(&revs);
+	trace2_region_leave("survey", "phase/objects", ctx->repo);
+}
+
+int cmd_survey(int argc, const char **argv, const char *prefix)
+{
+	static struct survey_context ctx = {
+		.opts = {
+			.verbose = 0,
+			.show_progress = -1, /* defaults to isatty(2) */
+
+			.refs.want_all_refs = -1,
+
+			.refs.want_branches = -1, /* default these to undefined */
+			.refs.want_tags = -1,
+			.refs.want_remotes = -1,
+			.refs.want_detached = -1,
+			.refs.want_other = -1,
+		},
+		.refs = STRVEC_INIT,
+	};
+
+	static struct option survey_options[] = {
+		OPT__VERBOSE(&ctx.opts.verbose, N_("verbose output")),
+		OPT_BOOL(0, "progress", &ctx.opts.show_progress, N_("show progress")),
+
+		OPT_BOOL_F(0, "all-refs", &ctx.opts.refs.want_all_refs, N_("include all refs"),          PARSE_OPT_NONEG),
+
+		OPT_BOOL_F(0, "branches", &ctx.opts.refs.want_branches, N_("include branches"),          PARSE_OPT_NONEG),
+		OPT_BOOL_F(0, "tags",     &ctx.opts.refs.want_tags,     N_("include tags"),              PARSE_OPT_NONEG),
+		OPT_BOOL_F(0, "remotes",  &ctx.opts.refs.want_remotes,  N_("include all remotes refs"),  PARSE_OPT_NONEG),
+		OPT_BOOL_F(0, "detached", &ctx.opts.refs.want_detached, N_("include detached HEAD"),     PARSE_OPT_NONEG),
+		OPT_BOOL_F(0, "other",    &ctx.opts.refs.want_other,    N_("include notes and stashes"), PARSE_OPT_NONEG),
+
+		OPT_END(),
+	};
+
+	if (argc == 2 && !strcmp(argv[1], "-h"))
+		usage_with_options(survey_usage, survey_options);
+
+	ctx.repo = the_repository;
+	prepare_repo_settings(ctx.repo);
+	survey_load_config(&ctx);
+
+	argc = parse_options(argc, argv, prefix, survey_options, survey_usage, 0);
+
+	if (ctx.opts.show_progress < 0)
+		ctx.opts.show_progress = isatty(2);
+	fixup_refs_wanted(&ctx);
+
+	survey_phase_refs(&ctx);
+
+	survey_phase_objects(&ctx);
+
+	switch (ctx.opts.format) {
+	case SURVEY_PLAINTEXT:
+		survey_report_plaintext(&ctx);
+		break;
+
+	case SURVEY_JSON:
+		survey_report_json(&ctx);
+		break;
+
+	default:
+		BUG("Undefined format");
+	}
+
+	clear_survey_context(&ctx);
+	return 0;
+}
diff --git a/command-list.txt b/command-list.txt
index e0bb87b3b5c278..ecc9d2281a0909 100644
--- a/command-list.txt
+++ b/command-list.txt
@@ -60,6 +60,7 @@ git-annotate                            ancillaryinterrogators
 git-apply                               plumbingmanipulators            complete
 git-archimport                          foreignscminterface
 git-archive                             mainporcelain
+git-backfill                            mainporcelain           history
 git-bisect                              mainporcelain           info
 git-blame                               ancillaryinterrogators          complete
 git-branch                              mainporcelain           history
@@ -186,6 +187,7 @@ git-stash                               mainporcelain
 git-status                              mainporcelain           info
 git-stripspace                          purehelpers
 git-submodule                           mainporcelain
+git-survey                              mainporcelain
 git-svn                                 foreignscminterface
 git-switch                              mainporcelain           history
 git-symbolic-ref                        plumbingmanipulators
diff --git a/git.c b/git.c
index 9a618a2740f195..98e90838e42fbf 100644
--- a/git.c
+++ b/git.c
@@ -509,6 +509,7 @@ static struct cmd_struct commands[] = {
 	{ "annotate", cmd_annotate, RUN_SETUP },
 	{ "apply", cmd_apply, RUN_SETUP_GENTLY },
 	{ "archive", cmd_archive, RUN_SETUP_GENTLY },
+	{ "backfill", cmd_backfill, RUN_SETUP },
 	{ "bisect", cmd_bisect, RUN_SETUP },
 	{ "blame", cmd_blame, RUN_SETUP },
 	{ "branch", cmd_branch, RUN_SETUP | DELAY_PAGER_CONFIG },
@@ -629,6 +630,7 @@ static struct cmd_struct commands[] = {
 	{ "status", cmd_status, RUN_SETUP | NEED_WORK_TREE },
 	{ "stripspace", cmd_stripspace },
 	{ "submodule--helper", cmd_submodule__helper, RUN_SETUP },
+	{ "survey", cmd_survey, RUN_SETUP },
 	{ "switch", cmd_switch, RUN_SETUP | NEED_WORK_TREE },
 	{ "symbolic-ref", cmd_symbolic_ref, RUN_SETUP },
 	{ "tag", cmd_tag, RUN_SETUP | DELAY_PAGER_CONFIG },
diff --git a/pack-objects.h b/pack-objects.h
index b9898a4e64b8b4..50097552d03f20 100644
--- a/pack-objects.h
+++ b/pack-objects.h
@@ -207,6 +207,26 @@ static inline uint32_t pack_name_hash(const char *name)
 	return hash;
 }
 
+static inline uint32_t pack_full_name_hash(const char *name)
+{
+	const uint32_t bigp = 1234572167U;
+	uint32_t c, hash = bigp;
+
+	if (!name)
+		return 0;
+
+	/*
+	 * Just do the dumbest thing possible: add random multiples of a
+	 * large prime number with a binary shift. Goal is not cryptographic,
+	 * but generally uniformly distributed.
+	 */
+	while ((c = *name++) != 0) {
+		hash += c * bigp;
+		hash = (hash >> 5) | (hash << 27);
+	}
+	return hash;
+}
+
 static inline enum object_type oe_type(const struct object_entry *e)
 {
 	return e->type_valid ? e->type_ : OBJ_BAD;
diff --git a/path-walk.c b/path-walk.c
new file mode 100644
index 00000000000000..9391e0579aea4b
--- /dev/null
+++ b/path-walk.c
@@ -0,0 +1,401 @@
+/*
+ * path-walk.c: implementation for path-based walks of the object graph.
+ */
+#include "git-compat-util.h"
+#include "path-walk.h"
+#include "blob.h"
+#include "commit.h"
+#include "dir.h"
+#include "hashmap.h"
+#include "hex.h"
+#include "object.h"
+#include "oid-array.h"
+#include "repository.h"
+#include "revision.h"
+#include "string-list.h"
+#include "strmap.h"
+#include "tag.h"
+#include "trace2.h"
+#include "tree.h"
+#include "tree-walk.h"
+
+struct type_and_oid_list
+{
+	enum object_type type;
+	struct oid_array oids;
+	int maybe_interesting;
+};
+
+#define TYPE_AND_OID_LIST_INIT { \
+	.type = OBJ_NONE, 	 \
+	.oids = OID_ARRAY_INIT	 \
+}
+
+struct path_walk_context {
+	/**
+	 * Repeats of data in 'struct path_walk_info' for
+	 * access with fewer characters.
+	 */
+	struct repository *repo;
+	struct rev_info *revs;
+	struct path_walk_info *info;
+
+	/**
+	 * Map a path to a 'struct type_and_oid_list'
+	 * containing the objects discovered at that
+	 * path.
+	 */
+	struct strmap paths_to_lists;
+
+	/**
+	 * Store the current list of paths in a stack, to
+	 * facilitate depth-first-search without recursion.
+	 */
+	struct string_list path_stack;
+};
+
+static int add_children(struct path_walk_context *ctx,
+			const char *base_path,
+			struct object_id *oid)
+{
+	struct tree_desc desc;
+	struct name_entry entry;
+	struct strbuf path = STRBUF_INIT;
+	size_t base_len;
+	struct tree *tree = lookup_tree(ctx->repo, oid);
+
+	if (!tree) {
+		error(_("failed to walk children of tree %s: not found"),
+		      oid_to_hex(oid));
+		return -1;
+	}
+
+	strbuf_addstr(&path, base_path);
+	base_len = path.len;
+
+	parse_tree(tree);
+	init_tree_desc(&desc, &tree->object.oid, tree->buffer, tree->size);
+	while (tree_entry(&desc, &entry)) {
+		struct type_and_oid_list *list;
+		struct object *o;
+		/* Not actually true, but we will ignore submodules later. */
+		enum object_type type = S_ISDIR(entry.mode) ? OBJ_TREE : OBJ_BLOB;
+
+		/* Skip submodules. */
+		if (S_ISGITLINK(entry.mode))
+			continue;
+
+		/* If the caller doesn't want blobs, then don't bother. */
+		if (!ctx->info->blobs && type == OBJ_BLOB)
+			continue;
+
+		if (type == OBJ_TREE) {
+			struct tree *child = lookup_tree(ctx->repo, &entry.oid);
+			o = child ? &child->object : NULL;
+		} else if (type == OBJ_BLOB) {
+			struct blob *child = lookup_blob(ctx->repo, &entry.oid);
+			o = child ? &child->object : NULL;
+		} else {
+			/* Wrong type? */
+			continue;
+		}
+
+		if (!o) /* report error?*/
+			continue;
+
+		/* Skip this object if already seen. */
+		if (o->flags & SEEN)
+			continue;
+		o->flags |= SEEN;
+
+		strbuf_setlen(&path, base_len);
+		strbuf_add(&path, entry.path, entry.pathlen);
+
+		/*
+		 * Trees will end with "/" for concatenation and distinction
+		 * from blobs at the same path.
+		 */
+		if (type == OBJ_TREE)
+			strbuf_addch(&path, '/');
+
+		if (ctx->info->pl) {
+			int dtype;
+			enum pattern_match_result match;
+			match = path_matches_pattern_list(path.buf, path.len,
+							  path.buf + base_len, &dtype,
+							  ctx->info->pl,
+							  ctx->repo->index);
+
+			if (ctx->info->pl->use_cone_patterns &&
+			    match == NOT_MATCHED)
+				continue;
+			else if (!ctx->info->pl->use_cone_patterns &&
+				 type == OBJ_BLOB &&
+				 match != MATCHED)
+				continue;
+		}
+
+		if (!(list = strmap_get(&ctx->paths_to_lists, path.buf))) {
+			CALLOC_ARRAY(list, 1);
+			list->type = type;
+			strmap_put(&ctx->paths_to_lists, path.buf, list);
+			string_list_append(&ctx->path_stack, path.buf);
+
+			if (!(o->flags & UNINTERESTING))
+				list->maybe_interesting = 1;
+		}
+		oid_array_append(&list->oids, &entry.oid);
+	}
+
+	free_tree_buffer(tree);
+	strbuf_release(&path);
+	return 0;
+}
+
+/*
+ * For each path in paths_to_explore, walk the trees another level
+ * and add any found blobs to the batch (but only if they don't
+ * exist and haven't been added yet).
+ */
+static int walk_path(struct path_walk_context *ctx,
+		     const char *path)
+{
+	struct type_and_oid_list *list;
+	int ret = 0;
+
+	list = strmap_get(&ctx->paths_to_lists, path);
+
+	if (ctx->info->prune_all_uninteresting) {
+		/*
+		 * This is true if all objects were UNINTERESTING
+		 * when added to the list.
+		 */
+		if (!list->maybe_interesting)
+			return 0;
+
+		/*
+		 * But it's still possible that the objects were set
+		 * as UNINTERESTING after being added. Do a quick check.
+		 */
+		list->maybe_interesting = 0;
+		for (size_t i = 0;
+		     !list->maybe_interesting && i < list->oids.nr;
+		     i++) {
+			if (list->type == OBJ_TREE) {
+				struct tree *t = lookup_tree(ctx->repo,
+							     &list->oids.oid[i]);
+				if (t && !(t->object.flags & UNINTERESTING))
+					list->maybe_interesting = 1;
+			} else {
+				struct blob *b = lookup_blob(ctx->repo,
+							     &list->oids.oid[i]);
+				if (b && !(b->object.flags & UNINTERESTING))
+					list->maybe_interesting = 1;
+			}
+		}
+
+		/* We have confirmed that all objects are UNINTERESTING. */
+		if (!list->maybe_interesting)
+			return 0;
+	}
+
+	/* Evaluate function pointer on this data, if requested. */
+	if ((list->type == OBJ_TREE && ctx->info->trees) ||
+	    (list->type == OBJ_BLOB && ctx->info->blobs))
+		ret = ctx->info->path_fn(path, &list->oids, list->type,
+					ctx->info->path_fn_data);
+
+	/* Expand data for children. */
+	if (list->type == OBJ_TREE) {
+		for (size_t i = 0; i < list->oids.nr; i++) {
+			ret |= add_children(ctx,
+					    path,
+					    &list->oids.oid[i]);
+		}
+	}
+
+	oid_array_clear(&list->oids);
+	strmap_remove(&ctx->paths_to_lists, path, 1);
+	return ret;
+}
+
+static void clear_strmap(struct strmap *map)
+{
+	struct hashmap_iter iter;
+	struct strmap_entry *e;
+
+	hashmap_for_each_entry(&map->map, &iter, e, ent) {
+		struct type_and_oid_list *list = e->value;
+		oid_array_clear(&list->oids);
+	}
+	strmap_clear(map, 1);
+	strmap_init(map);
+}
+
+/**
+ * Given the configuration of 'info', walk the commits based on 'info->revs' and
+ * call 'info->path_fn' on each discovered path.
+ *
+ * Returns nonzero on an error.
+ */
+int walk_objects_by_path(struct path_walk_info *info)
+{
+	const char *root_path = "";
+	int ret = 0, has_uninteresting = 0;
+	size_t commits_nr = 0, paths_nr = 0;
+	struct commit *c;
+	struct type_and_oid_list *root_tree_list;
+	struct type_and_oid_list *commit_list;
+	struct path_walk_context ctx = {
+		.repo = info->revs->repo,
+		.revs = info->revs,
+		.info = info,
+		.path_stack = STRING_LIST_INIT_DUP,
+		.paths_to_lists = STRMAP_INIT
+	};
+	struct oidset root_tree_set = OIDSET_INIT;
+
+	struct oid_array tagged_tree_list = OID_ARRAY_INIT;
+	struct oid_array tagged_blob_list = OID_ARRAY_INIT;
+
+	trace2_region_enter("path-walk", "commit-walk", info->revs->repo);
+
+	CALLOC_ARRAY(commit_list, 1);
+	commit_list->type = OBJ_COMMIT;
+
+	/* Insert a single list for the root tree into the paths. */
+	CALLOC_ARRAY(root_tree_list, 1);
+	root_tree_list->type = OBJ_TREE;
+	root_tree_list->maybe_interesting = 1;
+	strmap_put(&ctx.paths_to_lists, root_path, root_tree_list);
+
+	if (prepare_revision_walk(info->revs))
+		die(_("failed to setup revision walk"));
+
+	while ((c = get_revision(info->revs))) {
+		struct object_id *oid;
+		struct tree *t;
+		commits_nr++;
+
+		if (info->commits)
+			oid_array_append(&commit_list->oids,
+					 &c->object.oid);
+
+		/* If we only care about commits, then skip trees. */
+		if (!info->trees && !info->blobs)
+			continue;
+
+		oid = get_commit_tree_oid(c);
+		t = lookup_tree(info->revs->repo, oid);
+
+		if (t) {
+			oidset_insert(&root_tree_set, oid);
+			oid_array_append(&root_tree_list->oids, oid);
+		} else {
+			warning("could not find tree %s", oid_to_hex(oid));
+		}
+
+		if (t && (c->object.flags & UNINTERESTING)) {
+			t->object.flags |= UNINTERESTING;
+			has_uninteresting = 1;
+		}
+	}
+
+	trace2_data_intmax("path-walk", ctx.repo, "commits", commits_nr);
+	trace2_region_leave("path-walk", "commit-walk", info->revs->repo);
+
+	/* Track all commits. */
+	if (info->commits)
+		ret = info->path_fn("initial", &commit_list->oids, OBJ_COMMIT,
+				    info->path_fn_data);
+	oid_array_clear(&commit_list->oids);
+	free(commit_list);
+
+	if (info->tags) {
+		struct oid_array tags = OID_ARRAY_INIT;
+
+		trace2_region_enter("path-walk", "tag-walk", info->revs->repo);
+
+		/*
+		 * Walk any pending objects at this point, but they should only
+		 * be tags.
+		 */
+		for (size_t i = 0; i < info->revs->pending.nr; i++) {
+			struct object_array_entry *pending = info->revs->pending.objects + i;
+			struct object *obj = pending->item;
+
+			while (obj->type == OBJ_TAG) {
+				struct tag *tag = lookup_tag(info->revs->repo,
+							     &obj->oid);
+				oid_array_append(&tags, &obj->oid);
+				obj = tag->tagged;
+			}
+
+			switch (obj->type) {
+			case OBJ_TREE:
+				oid_array_append(&tagged_tree_list, &obj->oid);
+				break;
+
+			case OBJ_BLOB:
+				oid_array_append(&tagged_blob_list, &obj->oid);
+				break;
+
+			case OBJ_COMMIT:
+				/* skip */
+				break;
+
+			default:
+				BUG("should not see any other type here");
+			}
+		}
+
+		info->path_fn("initial", &tags, OBJ_TAG, info->path_fn_data);
+
+		if (tagged_tree_list.nr)
+			info->path_fn("tagged-trees", &tagged_tree_list, OBJ_TREE,
+				      info->path_fn_data);
+		if (tagged_blob_list.nr)
+			info->path_fn("tagged-blobs", &tagged_blob_list, OBJ_BLOB,
+				      info->path_fn_data);
+
+		trace2_data_intmax("path-walk", ctx.repo, "tags", tags.nr);
+		trace2_region_leave("path-walk", "tag-walk", info->revs->repo);
+		oid_array_clear(&tags);
+		oid_array_clear(&tagged_tree_list);
+		oid_array_clear(&tagged_blob_list);
+	}
+
+	/*
+	 * Before performing a DFS of our paths and emitting them as interesting,
+	 * do a full walk of the trees to distribute the UNINTERESTING bit. Use
+	 * the sparse algorithm if prune_all_uninteresting was set.
+	 */
+	if (has_uninteresting) {
+		trace2_region_enter("path-walk", "uninteresting-walk", info->revs->repo);
+		if (info->prune_all_uninteresting)
+			mark_trees_uninteresting_sparse(ctx.repo, &root_tree_set);
+		else
+			mark_trees_uninteresting_dense(ctx.repo, &root_tree_set);
+		trace2_region_leave("path-walk", "uninteresting-walk", info->revs->repo);
+	}
+	oidset_clear(&root_tree_set);
+
+	string_list_append(&ctx.path_stack, root_path);
+
+	trace2_region_enter("path-walk", "path-walk", info->revs->repo);
+	while (!ret && ctx.path_stack.nr) {
+		char *path = ctx.path_stack.items[ctx.path_stack.nr - 1].string;
+		ctx.path_stack.nr--;
+		paths_nr++;
+
+		ret = walk_path(&ctx, path);
+
+		free(path);
+	}
+	trace2_data_intmax("path-walk", ctx.repo, "paths", paths_nr);
+	trace2_region_leave("path-walk", "path-walk", info->revs->repo);
+
+	clear_strmap(&ctx.paths_to_lists);
+	string_list_clear(&ctx.path_stack, 0);
+	return ret;
+}
diff --git a/path-walk.h b/path-walk.h
new file mode 100644
index 00000000000000..7c02bca71561aa
--- /dev/null
+++ b/path-walk.h
@@ -0,0 +1,73 @@
+/*
+ * path-walk.h : Methods and structures for walking the object graph in batches
+ * by the paths that can reach those objects.
+ */
+#include "object.h" /* Required for 'enum object_type'. */
+
+struct rev_info;
+struct oid_array;
+struct pattern_list;
+
+/**
+ * The type of a function pointer for the method that is called on a list of
+ * objects reachable at a given path.
+ */
+typedef int (*path_fn)(const char *path,
+		       struct oid_array *oids,
+		       enum object_type type,
+		       void *data);
+
+struct path_walk_info {
+	/**
+	 * revs provides the definitions for the commit walk, including
+	 * which commits are UNINTERESTING or not.
+	 */
+	struct rev_info *revs;
+
+	/**
+	 * The caller wishes to execute custom logic on objects reachable at a
+	 * given path. Every reachable object will be visited exactly once, and
+	 * the first path to see an object wins. This may not be a stable choice.
+	 */
+	path_fn path_fn;
+	void *path_fn_data;
+
+	/**
+	 * Initialize which object types the path_fn should be called on. This
+	 * could also limit the walk to skip blobs if not set.
+	 */
+	int commits;
+	int trees;
+	int blobs;
+	int tags;
+
+	/**
+	 * Specify a sparse-checkout definition to match our paths to. Do not
+	 * walk outside of this sparse definition. If the patterns are in
+	 * cone mode, then the search may prune directories that are outside
+	 * of the cone. If not in cone mode, then all tree paths will be
+	 * explored but the path_fn will only be called when the path matches
+	 * the sparse-checkout patterns.
+	 */
+	struct pattern_list *pl;
+
+	/**
+	 * When 'prune_all_uninteresting' is set and a path has all objects
+	 * marked as UNINTERESTING, then the path-walk will not visit those
+	 * objects. It will not call path_fn on those objects and will not
+	 * walk the children of such trees.
+	 */
+	int prune_all_uninteresting;
+};
+
+#define PATH_WALK_INFO_INIT {   \
+	.blobs = 1,		\
+}
+
+/**
+ * Given the configuration of 'info', walk the commits based on 'info->revs' and
+ * call 'info->path_fn' on each discovered path.
+ *
+ * Returns nonzero on an error.
+ */
+int walk_objects_by_path(struct path_walk_info *info);
diff --git a/repo-settings.c b/repo-settings.c
index 2b4e68731bedce..d9597d84556161 100644
--- a/repo-settings.c
+++ b/repo-settings.c
@@ -45,11 +45,13 @@ void prepare_repo_settings(struct repository *r)
 		r->settings.fetch_negotiation_algorithm = FETCH_NEGOTIATION_SKIPPING;
 		r->settings.pack_use_bitmap_boundary_traversal = 1;
 		r->settings.pack_use_multi_pack_reuse = 1;
+		r->settings.pack_use_path_walk = 1;
 	}
 	if (manyfiles) {
 		r->settings.index_version = 4;
 		r->settings.index_skip_hash = 1;
 		r->settings.core_untracked_cache = UNTRACKED_CACHE_WRITE;
+		r->settings.pack_use_path_walk = 1;
 	}
 
 	/* Commit graph config or default, does not cascade (simple) */
@@ -64,6 +66,7 @@ void prepare_repo_settings(struct repository *r)
 
 	/* Boolean config or default, does not cascade (simple)  */
 	repo_cfg_bool(r, "pack.usesparse", &r->settings.pack_use_sparse, 1);
+	repo_cfg_bool(r, "pack.usepathwalk", &r->settings.pack_use_path_walk, 0);
 	repo_cfg_bool(r, "core.multipackindex", &r->settings.core_multi_pack_index, 1);
 	repo_cfg_bool(r, "index.sparse", &r->settings.sparse_index, 0);
 	repo_cfg_bool(r, "index.skiphash", &r->settings.index_skip_hash, r->settings.index_skip_hash);
diff --git a/repository.h b/repository.h
index af6ea0a62cdb70..2ae9c2b1741528 100644
--- a/repository.h
+++ b/repository.h
@@ -62,6 +62,7 @@ struct repo_settings {
 	enum untracked_cache_setting core_untracked_cache;
 
 	int pack_use_sparse;
+	int pack_use_path_walk;
 	enum fetch_negotiation_setting fetch_negotiation_algorithm;
 
 	int core_multi_pack_index;
diff --git a/revision.c b/revision.c
index ac94f8d4292ff3..21c8b6d1bc0486 100644
--- a/revision.c
+++ b/revision.c
@@ -219,6 +219,21 @@ static void add_children_by_path(struct repository *r,
 	free_tree_buffer(tree);
 }
 
+void mark_trees_uninteresting_dense(struct repository *r,
+				    struct oidset *trees)
+{
+	struct object_id *oid;
+	struct oidset_iter iter;
+
+	oidset_iter_init(trees, &iter);
+	while ((oid = oidset_iter_next(&iter))) {
+		struct tree *tree = lookup_tree(r, oid);
+
+		if (tree->object.flags & UNINTERESTING)
+			mark_tree_contents_uninteresting(r, tree);
+	}
+}
+
 void mark_trees_uninteresting_sparse(struct repository *r,
 				     struct oidset *trees)
 {
diff --git a/revision.h b/revision.h
index 0e470d1df19f69..6c3df8e42bfa6d 100644
--- a/revision.h
+++ b/revision.h
@@ -487,6 +487,7 @@ void put_revision_mark(const struct rev_info *revs,
 
 void mark_parents_uninteresting(struct rev_info *revs, struct commit *commit);
 void mark_tree_uninteresting(struct repository *r, struct tree *tree);
+void mark_trees_uninteresting_dense(struct repository *r, struct oidset *trees);
 void mark_trees_uninteresting_sparse(struct repository *r, struct oidset *trees);
 
 void show_object_with_name(FILE *, struct object *, const char *);
diff --git a/scalar.c b/scalar.c
index 6166a8dd4c8d1f..031d1ac179f782 100644
--- a/scalar.c
+++ b/scalar.c
@@ -170,6 +170,7 @@ static int set_recommended_config(int reconfigure)
 		{ "core.autoCRLF", "false" },
 		{ "core.safeCRLF", "false" },
 		{ "fetch.showForcedUpdates", "false" },
+		{ "push.usePathWalk", "true" },
 		{ NULL, NULL },
 	};
 	int i;
diff --git a/t/README b/t/README
index 44c02d81298dc6..a5d7d0239e074f 100644
--- a/t/README
+++ b/t/README
@@ -433,6 +433,10 @@ GIT_TEST_PACK_SPARSE=<boolean> if disabled will default the pack-objects
 builtin to use the non-sparse object walk. This can still be overridden by
 the --sparse command-line argument.
 
+GIT_TEST_PACK_PATH_WALK=<boolean> if enabled will default the pack-objects
+builtin to use the path-walk API for the object walk. This can still be
+overridden by the --no-path-walk command-line argument.
+
 GIT_TEST_PRELOAD_INDEX=<boolean> exercises the preload-index code path
 by overriding the minimum number of cache entries required per thread.
 
diff --git a/t/helper/test-name-hash.c b/t/helper/test-name-hash.c
new file mode 100644
index 00000000000000..c82ccd7cefd860
--- /dev/null
+++ b/t/helper/test-name-hash.c
@@ -0,0 +1,23 @@
+/*
+ * test-name-hash.c: Read a list of paths over stdin and report on their
+ * name-hash and full name-hash.
+ */
+
+#include "test-tool.h"
+#include "git-compat-util.h"
+#include "pack-objects.h"
+#include "strbuf.h"
+
+int cmd__name_hash(int argc, const char **argv)
+{
+	struct strbuf line = STRBUF_INIT;
+
+	while (!strbuf_getline(&line, stdin)) {
+		uint32_t name_hash = pack_name_hash(line.buf);
+		uint32_t full_hash = pack_full_name_hash(line.buf);
+
+		printf("%10"PRIu32"\t%10"PRIu32"\t%s\n", name_hash, full_hash, line.buf);
+	}
+
+	return 0;
+}
diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c
index f8a67df7de90fc..4a6039210020e5 100644
--- a/t/helper/test-tool.c
+++ b/t/helper/test-tool.c
@@ -43,6 +43,7 @@ static struct test_cmd cmds[] = {
 	{ "match-trees", cmd__match_trees },
 	{ "mergesort", cmd__mergesort },
 	{ "mktemp", cmd__mktemp },
+	{ "name-hash", cmd__name_hash },
 	{ "oid-array", cmd__oid_array },
 	{ "online-cpus", cmd__online_cpus },
 	{ "pack-mtimes", cmd__pack_mtimes },
diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h
index e74bc0ffd4179c..56a83bf3aacd06 100644
--- a/t/helper/test-tool.h
+++ b/t/helper/test-tool.h
@@ -37,6 +37,7 @@ int cmd__lazy_init_name_hash(int argc, const char **argv);
 int cmd__match_trees(int argc, const char **argv);
 int cmd__mergesort(int argc, const char **argv);
 int cmd__mktemp(int argc, const char **argv);
+int cmd__name_hash(int argc, const char **argv);
 int cmd__online_cpus(int argc, const char **argv);
 int cmd__pack_mtimes(int argc, const char **argv);
 int cmd__parse_options(int argc, const char **argv);
diff --git a/t/perf/p5313-pack-objects.sh b/t/perf/p5313-pack-objects.sh
new file mode 100755
index 00000000000000..b3b7fff8abf3bf
--- /dev/null
+++ b/t/perf/p5313-pack-objects.sh
@@ -0,0 +1,101 @@
+#!/bin/sh
+
+test_description='Tests pack performance using bitmaps'
+. ./perf-lib.sh
+
+GIT_TEST_PASSING_SANITIZE_LEAK=0
+export GIT_TEST_PASSING_SANITIZE_LEAK
+
+test_perf_large_repo
+
+test_expect_success 'create rev input' '
+	cat >in-thin <<-EOF &&
+	$(git rev-parse HEAD)
+	^$(git rev-parse HEAD~1)
+	EOF
+	
+	cat >in-big-recent <<-EOF
+	$(git rev-parse HEAD)
+	^$(git rev-parse HEAD~1000)
+	EOF
+'
+
+test_perf 'thin pack' '
+	git pack-objects --thin --stdout --revs --sparse  <in-thin >out
+'
+
+test_size 'thin pack size' '
+	wc -c <out
+'
+
+test_perf 'thin pack with --full-name-hash' '
+	git pack-objects --thin --stdout --revs --sparse --full-name-hash <in-thin >out
+'
+
+test_size 'thin pack size with --full-name-hash' '
+	wc -c <out
+'
+
+test_perf 'thin pack with --path-walk' '
+	git pack-objects --thin --stdout --revs --sparse --path-walk <in-thin >out
+'
+
+test_size 'thin pack size with --path-walk' '
+	wc -c <out
+'
+
+test_perf 'big recent pack' '
+	git pack-objects --stdout --revs <in-big-recent >out
+'
+
+test_size 'big recent pack size' '
+	wc -c <out
+'
+
+test_perf 'big recent pack with --full-name-hash' '
+	git pack-objects --stdout --revs --full-name-hash <in-big-recent >out
+'
+
+test_size 'big recent pack size with --full-name-hash' '
+	wc -c <out
+'
+
+test_perf 'big recent pack with --path-walk' '
+	git pack-objects --stdout --revs --path-walk <in-big-recent >out
+'
+
+test_size 'big recent pack size with --path-walk' '
+	wc -c <out
+'
+
+test_perf 'full repack' '
+	git repack -adf --no-write-bitmap-index
+'
+
+test_size 'full repack size' '
+	du -a .git/objects/pack | \
+	   awk "{ print \$1; }" | \
+		       sort -nr | head -n 1
+'
+
+test_perf 'full repack with --full-name-hash' '
+	git repack -adf --no-write-bitmap-index --full-name-hash
+'
+
+test_size 'full repack size with --full-name-hash' '
+	du -a .git/objects/pack | \
+	   awk "{ print \$1; }" | \
+		       sort -nr | head -n 1
+'
+
+test_perf 'full repack with --path-walk' '
+	git repack -adf --no-write-bitmap-index --path-walk
+'
+
+test_size 'full repack size with --path-walk' '
+	du -a .git/objects/pack | \
+	   awk "{ print \$1; }" | \
+		       sort -nr | head -n 1
+'
+
+test_done
diff --git a/t/perf/p5314-name-hash.sh b/t/perf/p5314-name-hash.sh
new file mode 100755
index 00000000000000..9fe26612facc1d
--- /dev/null
+++ b/t/perf/p5314-name-hash.sh
@@ -0,0 +1,41 @@
+#!/bin/sh
+
+test_description='Tests pack performance using bitmaps'
+. ./perf-lib.sh
+
+GIT_TEST_PASSING_SANITIZE_LEAK=0
+export GIT_TEST_PASSING_SANITIZE_LEAK
+
+test_perf_large_repo
+
+test_size 'paths at head' '
+	git ls-tree -r --name-only HEAD >path-list &&
+	wc -l <path-list
+'
+
+test_size 'number of distinct name-hashes' '
+	cat path-list | test-tool name-hash >name-hashes &&
+	cat name-hashes | awk "{ print \$1; }" | sort -n | uniq -c >name-hash-count &&
+	wc -l <name-hash-count
+'
+
+test_size 'number of distinct full-name-hashes' '
+	cat name-hashes | awk "{ print \$2; }" | sort -n | uniq -c >full-name-hash-count &&
+	wc -l <full-name-hash-count
+'
+
+test_size 'maximum multiplicity of name-hashes' '
+	cat name-hash-count | \
+		sort -nr | \
+		head -n 1 | \
+		awk "{ print \$1; }"
+'
+
+test_size 'maximum multiplicity of fullname-hashes' '
+	cat full-name-hash-count | \
+		sort -nr | \
+		head -n 1 | \
+		awk "{ print \$1; }"
+'
+
+test_done
diff --git a/t/t5620-backfill.sh b/t/t5620-backfill.sh
new file mode 100755
index 00000000000000..1fa2e90f8cf57f
--- /dev/null
+++ b/t/t5620-backfill.sh
@@ -0,0 +1,181 @@
+#!/bin/sh
+
+test_description='git backfill on partial clones'
+
+GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
+export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
+
+TEST_PASSES_SANITIZE_LEAK=0
+export TEST_PASSES_SANITIZE_LEAK
+
+. ./test-lib.sh
+
+# We create objects in the 'src' repo.
+test_expect_success 'setup repo for object creation' '
+	echo "{print \$1}" >print_1.awk &&
+	echo "{print \$2}" >print_2.awk &&
+
+	git init src &&
+
+	mkdir -p src/a/b/c &&
+	mkdir -p src/d/e &&
+
+	for i in 1 2
+	do
+		for n in 1 2 3 4
+		do
+			echo "Version $i of file $n" > src/file.$n.txt &&
+			echo "Version $i of file a/$n" > src/a/file.$n.txt &&
+			echo "Version $i of file a/b/$n" > src/a/b/file.$n.txt &&
+			echo "Version $i of file a/b/c/$n" > src/a/b/c/file.$n.txt &&
+			echo "Version $i of file d/$n" > src/d/file.$n.txt &&
+			echo "Version $i of file d/e/$n" > src/d/e/file.$n.txt &&
+			git -C src add . &&
+			git -C src commit -m "Iteration $n" || return 1
+		done
+	done
+'
+
+# Clone 'src' into 'srv.bare' so we have a bare repo to be our origin
+# server for the partial clone.
+test_expect_success 'setup bare clone for server' '
+	git clone --bare "file://$(pwd)/src" srv.bare &&
+	git -C srv.bare config --local uploadpack.allowfilter 1 &&
+	git -C srv.bare config --local uploadpack.allowanysha1inwant 1
+'
+
+# do basic partial clone from "srv.bare"
+test_expect_success 'do partial clone 1, backfill gets all objects' '
+	git clone --no-checkout --filter=blob:none	\
+		--single-branch --branch=main 		\
+		"file://$(pwd)/srv.bare" backfill1 &&
+
+	# Backfill with no options gets everything reachable from HEAD.
+	GIT_TRACE2_EVENT="$(pwd)/backfill-file-trace" git \
+		-C backfill1 backfill &&
+
+	# We should have engaged the partial clone machinery
+	test_trace2_data promisor fetch_count 48 <backfill-file-trace &&
+
+	# No more missing objects!
+	git -C backfill1 rev-list --quiet --objects --missing=print HEAD >revs2 &&
+	test_line_count = 0 revs2
+'
+
+test_expect_success 'do partial clone 2, backfill batch size' '
+	git clone --no-checkout --filter=blob:none	\
+		--single-branch --branch=main 		\
+		"file://$(pwd)/srv.bare" backfill2 &&
+
+	GIT_TRACE2_EVENT="$(pwd)/batch-trace" git \
+		-C backfill2 backfill --batch-size=20 &&
+
+	# Batches were used
+	test_trace2_data promisor fetch_count 20 <batch-trace >matches &&
+	test_line_count = 2 matches &&
+	test_trace2_data promisor fetch_count 8 <batch-trace &&
+
+	# No more missing objects!
+	git -C backfill2 rev-list --quiet --objects --missing=print HEAD >revs2 &&
+	test_line_count = 0 revs2
+'
+
+test_expect_success 'backfill --sparse without sparse-checkout fails' '
+	git init not-sparse &&
+	test_must_fail git -C not-sparse backfill --sparse 2>err &&
+	grep "problem loading sparse-checkout" err
+'
+
+test_expect_success 'backfill --sparse' '
+	git clone --sparse --filter=blob:none		\
+		--single-branch --branch=main 		\
+		"file://$(pwd)/srv.bare" backfill3 &&
+
+	# Initial checkout includes four files at root.
+	git -C backfill3 rev-list --quiet --objects --missing=print HEAD >missing &&
+	test_line_count = 44 missing &&
+
+	# Initial sparse-checkout is just the files at root, so we get the
+	# older versions of the four files at tip.
+	GIT_TRACE2_EVENT="$(pwd)/sparse-trace1" git \
+		-C backfill3 backfill --sparse &&
+	test_trace2_data promisor fetch_count 4 <sparse-trace1 &&
+	test_trace2_data path-walk paths 5 <sparse-trace1 &&
+	git -C backfill3 rev-list --quiet --objects --missing=print HEAD >missing &&
+	test_line_count = 40 missing &&
+
+	# Expand the sparse-checkout to include 'd' recursively. This
+	# engages the algorithm to skip the trees for 'a'. Note that
+	# the "sparse-checkout set" command downloads the objects at tip
+	# to satisfy the current checkout.
+	git -C backfill3 sparse-checkout set d &&
+	GIT_TRACE2_EVENT="$(pwd)/sparse-trace2" git \
+		-C backfill3 backfill --sparse &&
+	test_trace2_data promisor fetch_count 8 <sparse-trace2 &&
+	test_trace2_data path-walk paths 15 <sparse-trace2 &&
+	git -C backfill3 rev-list --quiet --objects --missing=print HEAD >missing &&
+	test_line_count = 24 missing &&
+
+	# Disabling the --sparse option (on by default) will download everything
+	git -C backfill3 backfill --no-sparse &&
+	git -C backfill3 rev-list --quiet --objects --missing=print HEAD >missing &&
+	test_line_count = 0 missing
+'
+
+test_expect_success 'backfill --sparse without cone mode' '
+	git clone --no-checkout --filter=blob:none		\
+		--single-branch --branch=main 		\
+		"file://$(pwd)/srv.bare" backfill4 &&
+
+	# No blobs yet
+	git -C backfill4 rev-list --quiet --objects --missing=print HEAD >missing &&
+	test_line_count = 48 missing &&
+
+	# Define sparse-checkout by filename regardless of parent directory.
+	# This downloads 6 blobs to satisfy the checkout.
+	git -C backfill4 sparse-checkout set --no-cone "**/file.1.txt" &&
+	git -C backfill4 checkout main &&
+
+	GIT_TRACE2_EVENT="$(pwd)/no-cone-trace1" git \
+		-C backfill4 backfill --sparse &&
+	test_trace2_data promisor fetch_count 6 <no-cone-trace1 &&
+
+	# This walk needed to visit all directories to search for these paths.
+	test_trace2_data path-walk paths 12 <no-cone-trace1 &&
+	git -C backfill4 rev-list --quiet --objects --missing=print HEAD >missing &&
+	test_line_count = 36 missing
+'
+
+. "$TEST_DIRECTORY"/lib-httpd.sh
+start_httpd
+
+test_expect_success 'create a partial clone over HTTP' '
+	SERVER="$HTTPD_DOCUMENT_ROOT_PATH/server" &&
+	rm -rf "$SERVER" repo &&
+	git clone --bare "file://$(pwd)/src" "$SERVER" &&
+	test_config -C "$SERVER" uploadpack.allowfilter 1 &&
+	test_config -C "$SERVER" uploadpack.allowanysha1inwant 1 &&
+
+	git clone --no-checkout --filter=blob:none \
+		"$HTTPD_URL/smart/server" backfill-http
+'
+
+test_expect_success 'backfilling over HTTP succeeds' '
+	GIT_TRACE2_EVENT="$(pwd)/backfill-http-trace" git \
+		-C backfill-http backfill &&
+
+	# We should have engaged the partial clone machinery
+	test_trace2_data promisor fetch_count 48 <backfill-http-trace &&
+
+	# Confirm all objects are present, none missing.
+	git -C backfill-http rev-list --objects --all >rev-list-out &&
+	awk "{print \$1;}" <rev-list-out >oids &&
+	GIT_TRACE2_EVENT="$(pwd)/walk-trace" git -C backfill-http \
+		cat-file --batch-check <oids >batch-out &&
+	! grep missing batch-out
+'
+
+# DO NOT add non-httpd-specific tests here, because the last part of this
+# test script is only executed when httpd is available and enabled.
+
+test_done
diff --git a/t/t8100-git-survey.sh b/t/t8100-git-survey.sh
new file mode 100755
index 00000000000000..c2dab0033f9379
--- /dev/null
+++ b/t/t8100-git-survey.sh
@@ -0,0 +1,76 @@
+#!/bin/sh
+
+test_description='git survey'
+
+GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
+export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
+
+TEST_PASSES_SANITIZE_LEAK=0
+export TEST_PASSES_SANITIZE_LEAK
+
+. ./test-lib.sh
+
+test_expect_success 'git survey -h shows experimental warning' '
+	test_expect_code 129 git survey -h 2>usage &&
+	grep "EXPERIMENTAL!" usage
+'
+
+test_expect_success 'creat a semi-interesting repo' '
+	test_commit_bulk 10 &&
+	git tag -a -m one one HEAD~5 &&
+	git tag -a -m two two HEAD~3 &&
+	git tag -a -m three three two &&
+	git tag -a -m four four three &&
+	git update-ref -d refs/tags/three &&
+	git update-ref -d refs/tags/two
+'
+
+test_expect_success 'git survey (default)' '
+	git survey --all-refs >out 2>err &&
+	test_line_count = 0 err &&
+
+	cat >expect <<-EOF &&
+	GIT SURVEY for "$(pwd)"
+	-----------------------------------------------------
+
+	REFERENCES SUMMARY
+	========================
+	        Ref Type | Count
+	-----------------+------
+	        Branches |     1
+	     Remote refs |     0
+	      Tags (all) |     2
+	Tags (annotated) |     2
+
+	REACHABLE OBJECT SUMMARY
+	========================
+	Object Type | Count
+	------------+------
+	       Tags |     0
+	    Commits |    10
+	      Trees |    10
+	      Blobs |    10
+
+	TOTAL OBJECT SIZES BY TYPE
+	===============================================
+	Object Type | Count | Disk Size | Inflated Size
+	------------+-------+-----------+--------------
+	    Commits |    10 |      1523 |          2153
+	      Trees |    10 |       495 |          1706
+	      Blobs |    10 |       191 |           101
+	EOF
+
+	lines=$(wc -l <expect) &&
+	head -n $lines out >out-trimmed &&
+	test_cmp expect out-trimmed &&
+
+	for type in "DIRECTORIES" "FILES"
+	do
+		for metric in "COUNT" "DISK SIZE" "INFLATED SIZE"
+		do
+			grep "TOP $type BY $metric" out || return 1
+		done || return 1
+	done
+'
+
+test_done