Skip to content

Commit e91880a

Browse files
derrickstoleedscho
authored andcommitted
survey: summarize total sizes by object type
Now that we have explored objects by count, we can expand that a bit more to summarize the data for the on-disk and inflated size of those objects. This information is helpful for diagnosing both why disk space (and perhaps clone or fetch times) is growing but also why certain operations are slow because the inflated size of the abstract objects that must be processed is so large. Signed-off-by: Derrick Stolee <[email protected]>
1 parent ae8bf20 commit e91880a

File tree

2 files changed

+161
-0
lines changed

2 files changed

+161
-0
lines changed

builtin/survey.c

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,15 +60,36 @@ struct survey_report_object_summary {
6060
size_t blobs_nr;
6161
};
6262

63+
/**
64+
* For some category given by 'label', count the number of objects
65+
* that match that label along with the on-disk size and the size
66+
* after decompressing (both with delta bases and zlib).
67+
*/
68+
struct survey_report_object_size_summary {
69+
char *label;
70+
size_t nr;
71+
size_t disk_size;
72+
size_t inflated_size;
73+
size_t num_missing;
74+
};
75+
6376
/**
6477
* This struct contains all of the information that needs to be printed
6578
* at the end of the exploration of the repository and its references.
6679
*/
6780
struct survey_report {
6881
struct survey_report_ref_summary refs;
6982
struct survey_report_object_summary reachable_objects;
83+
84+
struct survey_report_object_size_summary *by_type;
7085
};
7186

87+
#define REPORT_TYPE_COMMIT 0
88+
#define REPORT_TYPE_TREE 1
89+
#define REPORT_TYPE_BLOB 2
90+
#define REPORT_TYPE_TAG 3
91+
#define REPORT_TYPE_COUNT 4
92+
7293
struct survey_context {
7394
struct repository *repo;
7495

@@ -280,12 +301,48 @@ static void survey_report_plaintext_reachable_object_summary(struct survey_conte
280301
clear_table(&table);
281302
}
282303

304+
static void survey_report_object_sizes(const char *title,
305+
const char *categories,
306+
struct survey_report_object_size_summary *summary,
307+
size_t summary_nr)
308+
{
309+
struct survey_table table = SURVEY_TABLE_INIT;
310+
table.table_name = title;
311+
312+
strvec_push(&table.header, categories);
313+
strvec_push(&table.header, _("Count"));
314+
strvec_push(&table.header, _("Disk Size"));
315+
strvec_push(&table.header, _("Inflated Size"));
316+
317+
for (size_t i = 0; i < summary_nr; i++) {
318+
char *label_str = xstrdup(summary[i].label);
319+
char *nr_str = xstrfmt("%"PRIuMAX, (uintmax_t)summary[i].nr);
320+
char *disk_str = xstrfmt("%"PRIuMAX, (uintmax_t)summary[i].disk_size);
321+
char *inflate_str = xstrfmt("%"PRIuMAX, (uintmax_t)summary[i].inflated_size);
322+
323+
insert_table_rowv(&table, label_str, nr_str,
324+
disk_str, inflate_str, NULL);
325+
326+
free(label_str);
327+
free(nr_str);
328+
free(disk_str);
329+
free(inflate_str);
330+
}
331+
332+
print_table_plaintext(&table);
333+
clear_table(&table);
334+
}
335+
283336
static void survey_report_plaintext(struct survey_context *ctx)
284337
{
285338
printf("GIT SURVEY for \"%s\"\n", ctx->repo->worktree);
286339
printf("-----------------------------------------------------\n");
287340
survey_report_plaintext_refs(ctx);
288341
survey_report_plaintext_reachable_object_summary(ctx);
342+
survey_report_object_sizes(_("TOTAL OBJECT SIZES BY TYPE"),
343+
_("Object Type"),
344+
ctx->report.by_type,
345+
REPORT_TYPE_COUNT);
289346
}
290347

291348
/*
@@ -498,6 +555,68 @@ static void increment_object_counts(
498555
}
499556
}
500557

558+
static void increment_totals(struct survey_context *ctx,
559+
struct oid_array *oids,
560+
struct survey_report_object_size_summary *summary)
561+
{
562+
for (size_t i = 0; i < oids->nr; i++) {
563+
struct object_info oi = OBJECT_INFO_INIT;
564+
unsigned oi_flags = OBJECT_INFO_FOR_PREFETCH;
565+
unsigned long object_length = 0;
566+
off_t disk_sizep = 0;
567+
enum object_type type;
568+
569+
oi.typep = &type;
570+
oi.sizep = &object_length;
571+
oi.disk_sizep = &disk_sizep;
572+
573+
if (oid_object_info_extended(ctx->repo, &oids->oid[i],
574+
&oi, oi_flags) < 0) {
575+
summary->num_missing++;
576+
} else {
577+
summary->nr++;
578+
summary->disk_size += disk_sizep;
579+
summary->inflated_size += object_length;
580+
}
581+
}
582+
}
583+
584+
static void increment_object_totals(struct survey_context *ctx,
585+
struct oid_array *oids,
586+
enum object_type type)
587+
{
588+
struct survey_report_object_size_summary *total;
589+
struct survey_report_object_size_summary summary = { 0 };
590+
591+
increment_totals(ctx, oids, &summary);
592+
593+
switch (type) {
594+
case OBJ_COMMIT:
595+
total = &ctx->report.by_type[REPORT_TYPE_COMMIT];
596+
break;
597+
598+
case OBJ_TREE:
599+
total = &ctx->report.by_type[REPORT_TYPE_TREE];
600+
break;
601+
602+
case OBJ_BLOB:
603+
total = &ctx->report.by_type[REPORT_TYPE_BLOB];
604+
break;
605+
606+
case OBJ_TAG:
607+
total = &ctx->report.by_type[REPORT_TYPE_TAG];
608+
break;
609+
610+
default:
611+
BUG("No other type allowed");
612+
}
613+
614+
total->nr += summary.nr;
615+
total->disk_size += summary.disk_size;
616+
total->inflated_size += summary.inflated_size;
617+
total->num_missing += summary.num_missing;
618+
}
619+
501620
static int survey_objects_path_walk_fn(const char *path,
502621
struct oid_array *oids,
503622
enum object_type type,
@@ -507,10 +626,20 @@ static int survey_objects_path_walk_fn(const char *path,
507626

508627
increment_object_counts(&ctx->report.reachable_objects,
509628
type, oids->nr);
629+
increment_object_totals(ctx, oids, type);
510630

511631
return 0;
512632
}
513633

634+
static void initialize_report(struct survey_context *ctx)
635+
{
636+
CALLOC_ARRAY(ctx->report.by_type, REPORT_TYPE_COUNT);
637+
ctx->report.by_type[REPORT_TYPE_COMMIT].label = xstrdup(_("Commits"));
638+
ctx->report.by_type[REPORT_TYPE_TREE].label = xstrdup(_("Trees"));
639+
ctx->report.by_type[REPORT_TYPE_BLOB].label = xstrdup(_("Blobs"));
640+
ctx->report.by_type[REPORT_TYPE_TAG].label = xstrdup(_("Tags"));
641+
}
642+
514643
static void survey_phase_objects(struct survey_context *ctx)
515644
{
516645
struct rev_info revs = REV_INFO_INIT;
@@ -523,12 +652,15 @@ static void survey_phase_objects(struct survey_context *ctx)
523652
info.path_fn = survey_objects_path_walk_fn;
524653
info.path_fn_data = ctx;
525654

655+
initialize_report(ctx);
656+
526657
repo_init_revisions(ctx->repo, &revs, "");
527658
revs.tag_objects = 1;
528659

529660
for (int i = 0; i < ctx->ref_array.nr; i++) {
530661
struct ref_array_item *item = ctx->ref_array.items[i];
531662
add_pending_oid(&revs, NULL, &item->objectname, add_flags);
663+
display_progress(ctx->progress, ++(ctx->progress_nr));
532664
}
533665

534666
walk_objects_by_path(&info);

t/t8100-git-survey.sh

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,26 @@ test_expect_success 'git survey (default)' '
2929
git survey --all-refs >out 2>err &&
3030
test_line_count = 0 err &&
3131
32+
test_oid_cache <<-EOF &&
33+
commits_size_on_disk sha1: 1523
34+
commits_size_on_disk sha256: 1811
35+
36+
commits_size sha1: 2153
37+
commits_size sha256: 2609
38+
39+
trees_size_on_disk sha1: 495
40+
trees_size_on_disk sha256: 635
41+
42+
trees_size sha1: 1706
43+
trees_size sha256: 2366
44+
45+
tags_size sha1: 528
46+
tags_size sha256: 624
47+
48+
tags_size_on_disk sha1: 510
49+
tags_size_on_disk sha256: 569
50+
EOF
51+
3252
tr , " " >expect <<-EOF &&
3353
GIT SURVEY for "$(pwd)"
3454
-----------------------------------------------------
@@ -50,6 +70,15 @@ test_expect_success 'git survey (default)' '
5070
Commits | 10
5171
Trees | 10
5272
Blobs | 10
73+
74+
TOTAL OBJECT SIZES BY TYPE
75+
===============================================
76+
Object Type | Count | Disk Size | Inflated Size
77+
------------+-------+-----------+--------------
78+
Commits | 10 | $(test_oid commits_size_on_disk) | $(test_oid commits_size)
79+
Trees | 10 | $(test_oid trees_size_on_disk) | $(test_oid trees_size)
80+
Blobs | 10 | 191 | 101
81+
Tags | 4 | $(test_oid tags_size_on_disk) | $(test_oid tags_size)
5382
EOF
5483
5584
test_cmp expect out

0 commit comments

Comments
 (0)