diff --git a/git-sizer.go b/git-sizer.go index d1e075c..0888d78 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -1,6 +1,7 @@ package main import ( + "context" "encoding/json" "errors" "fmt" @@ -19,7 +20,9 @@ import ( "github.com/github/git-sizer/sizes" ) -const usage = `usage: git-sizer [OPTS] +const usage = `usage: git-sizer [OPTS] [ROOT...] + + Scan objects in your Git repository and emit statistics about them. --threshold THRESHOLD minimum level of concern (i.e., number of stars) that should be reported. Default: @@ -45,12 +48,29 @@ const usage = `usage: git-sizer [OPTS] be set via gitconfig: 'sizer.progress'. --version only report the git-sizer version number + Object selection: + + git-sizer traverses through your Git history to find objects to + process. By default, it processes all objects that are reachable from + any reference. You can tell it to process only some of your + references; see "Reference selection" below. + + If explicit ROOTs are specified on the command line, each one should + be a string that 'git rev-parse' can convert into a single Git object + ID, like 'main', 'main~:src', or an abbreviated SHA-1. See + git-rev-parse(1) for details. In that case, git-sizer also treats + those objects as starting points for its traversal, and also includes + the Git objects that are reachable from those roots in the analysis. + + As a special case, if one or more ROOTs are specified on the command + line but _no_ reference selection options, then _only_ the specified + ROOTs are traversed, and no references. + Reference selection: - By default, git-sizer processes all Git objects that are reachable - from any reference. The following options can be used to limit which - references to process. The last rule matching a reference determines - whether that reference is processed. + The following options can be used to limit which references to + process. The last rule matching a reference determines whether that + reference is processed. --[no-]branches process [don't process] branches --[no-]tags process [don't process] tags @@ -93,14 +113,16 @@ var ReleaseVersion string var BuildVersion string func main() { - err := mainImplementation(os.Stdout, os.Stderr, os.Args[1:]) + ctx := context.Background() + + err := mainImplementation(ctx, os.Stdout, os.Stderr, os.Args[1:]) if err != nil { fmt.Fprintf(os.Stderr, "error: %s\n", err) os.Exit(1) } } -func mainImplementation(stdout, stderr io.Writer, args []string) error { +func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []string) error { var nameStyle sizes.NameStyle = sizes.NameStyleFull var cpuprofile string var jsonOutput bool @@ -216,10 +238,6 @@ func mainImplementation(stdout, stderr io.Writer, args []string) error { return nil } - if len(flags.Args()) != 0 { - return errors.New("excess arguments") - } - if repoErr != nil { return fmt.Errorf("couldn't open Git repository: %w", repoErr) } @@ -273,7 +291,7 @@ func mainImplementation(stdout, stderr io.Writer, args []string) error { progress = v } - rg, err := rgb.Finish() + rg, err := rgb.Finish(len(flags.Args()) == 0) if err != nil { return err } @@ -288,7 +306,27 @@ func mainImplementation(stdout, stderr io.Writer, args []string) error { progressMeter = meter.NewProgressMeter(stderr, 100*time.Millisecond) } - historySize, err := sizes.ScanRepositoryUsingGraph(repo, rg, nameStyle, progressMeter) + refRoots, err := sizes.CollectReferences(ctx, repo, rg) + if err != nil { + return fmt.Errorf("determining which reference to scan: %w", err) + } + + roots := make([]sizes.Root, 0, len(refRoots)+len(flags.Args())) + for _, refRoot := range refRoots { + roots = append(roots, refRoot) + } + + for _, arg := range flags.Args() { + oid, err := repo.ResolveObject(arg) + if err != nil { + return fmt.Errorf("resolving command-line argument %q: %w", arg, err) + } + roots = append(roots, sizes.NewExplicitRoot(arg, oid)) + } + + historySize, err := sizes.ScanRepositoryUsingGraph( + ctx, repo, roots, nameStyle, progressMeter, + ) if err != nil { return fmt.Errorf("error scanning repository: %w", err) } diff --git a/git/obj_resolver.go b/git/obj_resolver.go new file mode 100644 index 0000000..418e293 --- /dev/null +++ b/git/obj_resolver.go @@ -0,0 +1,20 @@ +package git + +import ( + "bytes" + "fmt" +) + +func (repo *Repository) ResolveObject(name string) (OID, error) { + cmd := repo.GitCommand("rev-parse", "--verify", "--end-of-options", name) + output, err := cmd.Output() + if err != nil { + return NullOID, fmt.Errorf("resolving object %q: %w", name, err) + } + oidString := string(bytes.TrimSpace(output)) + oid, err := NewOID(oidString) + if err != nil { + return NullOID, fmt.Errorf("parsing output %q from 'rev-parse': %w", oidString, err) + } + return oid, nil +} diff --git a/git/ref_filter.go b/git/ref_filter.go index 8eb8a9b..46aff66 100644 --- a/git/ref_filter.go +++ b/git/ref_filter.go @@ -83,15 +83,23 @@ func (_ allReferencesFilter) Filter(_ string) bool { var AllReferencesFilter allReferencesFilter +type noReferencesFilter struct{} + +func (_ noReferencesFilter) Filter(_ string) bool { + return false +} + +var NoReferencesFilter noReferencesFilter + // PrefixFilter returns a `ReferenceFilter` that matches references // whose names start with the specified `prefix`, which must match at // a component boundary. For example, // -// * Prefix "refs/foo" matches "refs/foo" and "refs/foo/bar" but not -// "refs/foobar". +// - Prefix "refs/foo" matches "refs/foo" and "refs/foo/bar" but not +// "refs/foobar". // -// * Prefix "refs/foo/" matches "refs/foo/bar" but not "refs/foo" or -// "refs/foobar". +// - Prefix "refs/foo/" matches "refs/foo/bar" but not "refs/foo" or +// "refs/foobar". func PrefixFilter(prefix string) ReferenceFilter { if prefix == "" { return AllReferencesFilter diff --git a/git_sizer_test.go b/git_sizer_test.go index 6ab132f..16d58c9 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -2,6 +2,7 @@ package main_test import ( "bytes" + "context" "encoding/json" "fmt" "io" @@ -557,87 +558,164 @@ func (rg refGrouper) Groups() []sizes.RefGroup { func TestBomb(t *testing.T) { t.Parallel() - repo := testutils.NewTestRepo(t, true, "bomb") - t.Cleanup(func() { repo.Remove(t) }) + ctx := context.Background() - newGitBomb(t, repo, 10, 10, "boom!\n") + testRepo := testutils.NewTestRepo(t, true, "bomb") + t.Cleanup(func() { testRepo.Remove(t) }) - h, err := sizes.ScanRepositoryUsingGraph( - repo.Repository(t), - refGrouper{}, sizes.NameStyleFull, meter.NoProgressMeter, - ) - require.NoError(t, err) + newGitBomb(t, testRepo, 10, 10, "boom!\n") + + repo := testRepo.Repository(t) - assert.Equal(t, counts.Count32(1), h.UniqueCommitCount, "unique commit count") - assert.Equal(t, counts.Count64(172), h.UniqueCommitSize, "unique commit size") - assert.Equal(t, counts.Count32(172), h.MaxCommitSize, "max commit size") - assert.Equal(t, "refs/heads/master", h.MaxCommitSizeCommit.Path(), "max commit size commit") - assert.Equal(t, counts.Count32(1), h.MaxHistoryDepth, "max history depth") - assert.Equal(t, counts.Count32(0), h.MaxParentCount, "max parent count") - assert.Equal(t, "refs/heads/master", h.MaxParentCountCommit.Path(), "max parent count commit") - - assert.Equal(t, counts.Count32(10), h.UniqueTreeCount, "unique tree count") - assert.Equal(t, counts.Count64(2910), h.UniqueTreeSize, "unique tree size") - assert.Equal(t, counts.Count64(100), h.UniqueTreeEntries, "unique tree entries") - assert.Equal(t, counts.Count32(10), h.MaxTreeEntries, "max tree entries") - assert.Equal(t, "refs/heads/master:d0/d0/d0/d0/d0/d0/d0/d0/d0", h.MaxTreeEntriesTree.Path(), "max tree entries tree") - - assert.Equal(t, counts.Count32(1), h.UniqueBlobCount, "unique blob count") - assert.Equal(t, counts.Count64(6), h.UniqueBlobSize, "unique blob size") - assert.Equal(t, counts.Count32(6), h.MaxBlobSize, "max blob size") - assert.Equal(t, "refs/heads/master:d0/d0/d0/d0/d0/d0/d0/d0/d0/f0", h.MaxBlobSizeBlob.Path(), "max blob size blob") - - assert.Equal(t, counts.Count32(0), h.UniqueTagCount, "unique tag count") - assert.Equal(t, counts.Count32(0), h.MaxTagDepth, "max tag depth") - - assert.Equal(t, counts.Count32(1), h.ReferenceCount, "reference count") - - assert.Equal(t, counts.Count32(10), h.MaxPathDepth, "max path depth") - assert.Equal(t, "refs/heads/master^{tree}", h.MaxPathDepthTree.Path(), "max path depth tree") - assert.Equal(t, counts.Count32(29), h.MaxPathLength, "max path length") - assert.Equal(t, "refs/heads/master^{tree}", h.MaxPathLengthTree.Path(), "max path length tree") - - assert.Equal(t, counts.Count32((pow(10, 10)-1)/(10-1)), h.MaxExpandedTreeCount, "max expanded tree count") - assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedTreeCountTree.Path(), "max expanded tree count tree") - assert.Equal(t, counts.Count32(0xffffffff), h.MaxExpandedBlobCount, "max expanded blob count") - assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedBlobCountTree.Path(), "max expanded blob count tree") - assert.Equal(t, counts.Count64(6*pow(10, 10)), h.MaxExpandedBlobSize, "max expanded blob size") - assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedBlobSizeTree.Path(), "max expanded blob size tree") - assert.Equal(t, counts.Count32(0), h.MaxExpandedLinkCount, "max expanded link count") - assert.Nil(t, h.MaxExpandedLinkCountTree, "max expanded link count tree") - assert.Equal(t, counts.Count32(0), h.MaxExpandedSubmoduleCount, "max expanded submodule count") - assert.Nil(t, h.MaxExpandedSubmoduleCountTree, "max expanded submodule count tree") + t.Run("full", func(t *testing.T) { + refRoots, err := sizes.CollectReferences(ctx, repo, refGrouper{}) + require.NoError(t, err) + + roots := make([]sizes.Root, 0, len(refRoots)) + for _, refRoot := range refRoots { + roots = append(roots, refRoot) + } + + h, err := sizes.ScanRepositoryUsingGraph( + ctx, repo, roots, sizes.NameStyleFull, meter.NoProgressMeter, + ) + require.NoError(t, err) + + assert.Equal(t, counts.Count32(1), h.UniqueCommitCount, "unique commit count") + assert.Equal(t, counts.Count64(172), h.UniqueCommitSize, "unique commit size") + assert.Equal(t, counts.Count32(172), h.MaxCommitSize, "max commit size") + assert.Equal(t, "refs/heads/master", h.MaxCommitSizeCommit.BestPath(), "max commit size commit") + assert.Equal(t, counts.Count32(1), h.MaxHistoryDepth, "max history depth") + assert.Equal(t, counts.Count32(0), h.MaxParentCount, "max parent count") + assert.Equal(t, "refs/heads/master", h.MaxParentCountCommit.BestPath(), "max parent count commit") + + assert.Equal(t, counts.Count32(10), h.UniqueTreeCount, "unique tree count") + assert.Equal(t, counts.Count64(2910), h.UniqueTreeSize, "unique tree size") + assert.Equal(t, counts.Count64(100), h.UniqueTreeEntries, "unique tree entries") + assert.Equal(t, counts.Count32(10), h.MaxTreeEntries, "max tree entries") + assert.Equal(t, "refs/heads/master:d0/d0/d0/d0/d0/d0/d0/d0/d0", h.MaxTreeEntriesTree.BestPath(), "max tree entries tree") + + assert.Equal(t, counts.Count32(1), h.UniqueBlobCount, "unique blob count") + assert.Equal(t, counts.Count64(6), h.UniqueBlobSize, "unique blob size") + assert.Equal(t, counts.Count32(6), h.MaxBlobSize, "max blob size") + assert.Equal(t, "refs/heads/master:d0/d0/d0/d0/d0/d0/d0/d0/d0/f0", h.MaxBlobSizeBlob.BestPath(), "max blob size blob") + + assert.Equal(t, counts.Count32(0), h.UniqueTagCount, "unique tag count") + assert.Equal(t, counts.Count32(0), h.MaxTagDepth, "max tag depth") + + assert.Equal(t, counts.Count32(1), h.ReferenceCount, "reference count") + + assert.Equal(t, counts.Count32(10), h.MaxPathDepth, "max path depth") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxPathDepthTree.BestPath(), "max path depth tree") + assert.Equal(t, counts.Count32(29), h.MaxPathLength, "max path length") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxPathLengthTree.BestPath(), "max path length tree") + + assert.Equal(t, counts.Count32((pow(10, 10)-1)/(10-1)), h.MaxExpandedTreeCount, "max expanded tree count") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedTreeCountTree.BestPath(), "max expanded tree count tree") + assert.Equal(t, counts.Count32(0xffffffff), h.MaxExpandedBlobCount, "max expanded blob count") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedBlobCountTree.BestPath(), "max expanded blob count tree") + assert.Equal(t, counts.Count64(6*pow(10, 10)), h.MaxExpandedBlobSize, "max expanded blob size") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedBlobSizeTree.BestPath(), "max expanded blob size tree") + assert.Equal(t, counts.Count32(0), h.MaxExpandedLinkCount, "max expanded link count") + assert.Nil(t, h.MaxExpandedLinkCountTree, "max expanded link count tree") + assert.Equal(t, counts.Count32(0), h.MaxExpandedSubmoduleCount, "max expanded submodule count") + assert.Nil(t, h.MaxExpandedSubmoduleCountTree, "max expanded submodule count tree") + }) + + t.Run("partial", func(t *testing.T) { + name := "master:d0/d0" + oid, err := repo.ResolveObject(name) + require.NoError(t, err) + roots := []sizes.Root{sizes.NewExplicitRoot(name, oid)} + + h, err := sizes.ScanRepositoryUsingGraph( + ctx, repo, roots, sizes.NameStyleFull, meter.NoProgressMeter, + ) + require.NoError(t, err) + + assert.Equal(t, counts.Count32(0), h.UniqueCommitCount, "unique commit count") + assert.Equal(t, counts.Count64(0), h.UniqueCommitSize, "unique commit size") + assert.Equal(t, counts.Count32(0), h.MaxCommitSize, "max commit size") + assert.Nil(t, h.MaxCommitSizeCommit) + assert.Equal(t, counts.Count32(0), h.MaxHistoryDepth, "max history depth") + assert.Equal(t, counts.Count32(0), h.MaxParentCount, "max parent count") + assert.Nil(t, h.MaxParentCountCommit, "max parent count commit") + + assert.Equal(t, counts.Count32(8), h.UniqueTreeCount, "unique tree count") + assert.Equal(t, counts.Count64(2330), h.UniqueTreeSize, "unique tree size") + assert.Equal(t, counts.Count64(80), h.UniqueTreeEntries, "unique tree entries") + assert.Equal(t, counts.Count32(10), h.MaxTreeEntries, "max tree entries") + assert.Equal(t, "master:d0/d0/d0/d0/d0/d0/d0/d0/d0", h.MaxTreeEntriesTree.BestPath(), "max tree entries tree") + + assert.Equal(t, counts.Count32(1), h.UniqueBlobCount, "unique blob count") + assert.Equal(t, counts.Count64(6), h.UniqueBlobSize, "unique blob size") + assert.Equal(t, counts.Count32(6), h.MaxBlobSize, "max blob size") + assert.Equal(t, "master:d0/d0/d0/d0/d0/d0/d0/d0/d0/f0", h.MaxBlobSizeBlob.BestPath(), "max blob size blob") + + assert.Equal(t, counts.Count32(0), h.UniqueTagCount, "unique tag count") + assert.Equal(t, counts.Count32(0), h.MaxTagDepth, "max tag depth") + + assert.Equal(t, counts.Count32(0), h.ReferenceCount, "reference count") + + assert.Equal(t, counts.Count32(8), h.MaxPathDepth, "max path depth") + assert.Equal(t, "master:d0/d0", h.MaxPathDepthTree.BestPath(), "max path depth tree") + assert.Equal(t, counts.Count32(23), h.MaxPathLength, "max path length") + assert.Equal(t, "master:d0/d0", h.MaxPathLengthTree.BestPath(), "max path length tree") + + assert.Equal(t, counts.Count32((pow(10, 8)-1)/(10-1)), h.MaxExpandedTreeCount, "max expanded tree count") + assert.Equal(t, "master:d0/d0", h.MaxExpandedTreeCountTree.BestPath(), "max expanded tree count tree") + assert.Equal(t, counts.Count32(pow(10, 8)), h.MaxExpandedBlobCount, "max expanded blob count") + assert.Equal(t, "master:d0/d0", h.MaxExpandedBlobCountTree.BestPath(), "max expanded blob count tree") + assert.Equal(t, counts.Count64(6*pow(10, 8)), h.MaxExpandedBlobSize, "max expanded blob size") + assert.Equal(t, "master:d0/d0", h.MaxExpandedBlobSizeTree.BestPath(), "max expanded blob size tree") + assert.Equal(t, counts.Count32(0), h.MaxExpandedLinkCount, "max expanded link count") + assert.Nil(t, h.MaxExpandedLinkCountTree, "max expanded link count tree") + assert.Equal(t, counts.Count32(0), h.MaxExpandedSubmoduleCount, "max expanded submodule count") + assert.Nil(t, h.MaxExpandedSubmoduleCountTree, "max expanded submodule count tree") + }) } func TestTaggedTags(t *testing.T) { t.Parallel() - repo := testutils.NewTestRepo(t, false, "tagged-tags") - defer repo.Remove(t) + ctx := context.Background() + + testRepo := testutils.NewTestRepo(t, false, "tagged-tags") + defer testRepo.Remove(t) timestamp := time.Unix(1112911993, 0) - cmd := repo.GitCommand(t, "commit", "-m", "initial", "--allow-empty") + cmd := testRepo.GitCommand(t, "commit", "-m", "initial", "--allow-empty") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating commit") // The lexicographical order of these tags is important, hence // their strange names. - cmd = repo.GitCommand(t, "tag", "-m", "tag 1", "tag", "master") + cmd = testRepo.GitCommand(t, "tag", "-m", "tag 1", "tag", "master") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating tag 1") - cmd = repo.GitCommand(t, "tag", "-m", "tag 2", "bag", "tag") + cmd = testRepo.GitCommand(t, "tag", "-m", "tag 2", "bag", "tag") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating tag 2") - cmd = repo.GitCommand(t, "tag", "-m", "tag 3", "wag", "bag") + cmd = testRepo.GitCommand(t, "tag", "-m", "tag 3", "wag", "bag") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating tag 3") + repo := testRepo.Repository(t) + + refRoots, err := sizes.CollectReferences(ctx, repo, refGrouper{}) + require.NoError(t, err) + + roots := make([]sizes.Root, 0, len(refRoots)) + for _, refRoot := range refRoots { + roots = append(roots, refRoot) + } + h, err := sizes.ScanRepositoryUsingGraph( - repo.Repository(t), - refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, + context.Background(), repo, + roots, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(3), h.MaxTagDepth, "tag depth") @@ -646,20 +724,32 @@ func TestTaggedTags(t *testing.T) { func TestFromSubdir(t *testing.T) { t.Parallel() - repo := testutils.NewTestRepo(t, false, "subdir") - defer repo.Remove(t) + ctx := context.Background() + + testRepo := testutils.NewTestRepo(t, false, "subdir") + defer testRepo.Remove(t) timestamp := time.Unix(1112911993, 0) - repo.AddFile(t, "subdir/file.txt", "Hello, world!\n") + testRepo.AddFile(t, "subdir/file.txt", "Hello, world!\n") - cmd := repo.GitCommand(t, "commit", "-m", "initial") + cmd := testRepo.GitCommand(t, "commit", "-m", "initial") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating commit") + repo := testRepo.Repository(t) + + refRoots, err := sizes.CollectReferences(ctx, repo, refGrouper{}) + require.NoError(t, err) + + roots := make([]sizes.Root, 0, len(refRoots)) + for _, refRoot := range refRoots { + roots = append(roots, refRoot) + } + h, err := sizes.ScanRepositoryUsingGraph( - repo.Repository(t), - refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, + context.Background(), testRepo.Repository(t), + roots, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.MaxPathDepth, "max path depth") @@ -668,6 +758,8 @@ func TestFromSubdir(t *testing.T) { func TestSubmodule(t *testing.T) { t.Parallel() + ctx := context.Background() + tmp, err := ioutil.TempDir("", "submodule") require.NoError(t, err, "creating temporary directory") @@ -677,42 +769,52 @@ func TestSubmodule(t *testing.T) { timestamp := time.Unix(1112911993, 0) - submRepo := testutils.TestRepo{ + submTestRepo := testutils.TestRepo{ Path: filepath.Join(tmp, "subm"), } - submRepo.Init(t, false) - submRepo.AddFile(t, "submfile1.txt", "Hello, submodule!\n") - submRepo.AddFile(t, "submfile2.txt", "Hello again, submodule!\n") - submRepo.AddFile(t, "submfile3.txt", "Hello again, submodule!\n") + submTestRepo.Init(t, false) + submTestRepo.AddFile(t, "submfile1.txt", "Hello, submodule!\n") + submTestRepo.AddFile(t, "submfile2.txt", "Hello again, submodule!\n") + submTestRepo.AddFile(t, "submfile3.txt", "Hello again, submodule!\n") - cmd := submRepo.GitCommand(t, "commit", "-m", "subm initial") + cmd := submTestRepo.GitCommand(t, "commit", "-m", "subm initial") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating subm commit") - mainRepo := testutils.TestRepo{ + mainTestRepo := testutils.TestRepo{ Path: filepath.Join(tmp, "main"), } - mainRepo.Init(t, false) + mainTestRepo.Init(t, false) - mainRepo.AddFile(t, "mainfile.txt", "Hello, main!\n") + mainTestRepo.AddFile(t, "mainfile.txt", "Hello, main!\n") - cmd = mainRepo.GitCommand(t, "commit", "-m", "main initial") + cmd = mainTestRepo.GitCommand(t, "commit", "-m", "main initial") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating main commit") // Make subm a submodule of main: - cmd = mainRepo.GitCommand(t, "-c", "protocol.file.allow=always", "submodule", "add", submRepo.Path, "sub") - cmd.Dir = mainRepo.Path + cmd = mainTestRepo.GitCommand(t, "-c", "protocol.file.allow=always", "submodule", "add", submTestRepo.Path, "sub") + cmd.Dir = mainTestRepo.Path require.NoError(t, cmd.Run(), "adding submodule") - cmd = mainRepo.GitCommand(t, "commit", "-m", "add submodule") + cmd = mainTestRepo.GitCommand(t, "commit", "-m", "add submodule") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "committing submodule to main") + mainRepo := mainTestRepo.Repository(t) + + mainRefRoots, err := sizes.CollectReferences(ctx, mainRepo, refGrouper{}) + require.NoError(t, err) + + mainRoots := make([]sizes.Root, 0, len(mainRefRoots)) + for _, refRoot := range mainRefRoots { + mainRoots = append(mainRoots, refRoot) + } + // Analyze the main repo: h, err := sizes.ScanRepositoryUsingGraph( - mainRepo.Repository(t), - refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, + context.Background(), mainTestRepo.Repository(t), + mainRoots, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.UniqueBlobCount, "unique blob count") @@ -720,12 +822,23 @@ func TestSubmodule(t *testing.T) { assert.Equal(t, counts.Count32(1), h.MaxExpandedSubmoduleCount, "max expanded submodule count") // Analyze the submodule: - submRepo2 := testutils.TestRepo{ - Path: filepath.Join(mainRepo.Path, "sub"), + submTestRepo2 := testutils.TestRepo{ + Path: filepath.Join(mainTestRepo.Path, "sub"), } + + submRepo2 := submTestRepo2.Repository(t) + + submRefRoots2, err := sizes.CollectReferences(ctx, submRepo2, refGrouper{}) + require.NoError(t, err) + + submRoots2 := make([]sizes.Root, 0, len(submRefRoots2)) + for _, refRoot := range submRefRoots2 { + submRoots2 = append(submRoots2, refRoot) + } + h, err = sizes.ScanRepositoryUsingGraph( - submRepo2.Repository(t), - refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, + context.Background(), submRepo2, + submRoots2, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.UniqueBlobCount, "unique blob count") diff --git a/internal/refopts/ref_group_builder.go b/internal/refopts/ref_group_builder.go index 3c3179e..48f1190 100644 --- a/internal/refopts/ref_group_builder.go +++ b/internal/refopts/ref_group_builder.go @@ -254,9 +254,14 @@ func (rgb *RefGroupBuilder) AddRefopts(flags *pflag.FlagSet) { // Finish collects the information gained from processing the options // and returns a `sizes.RefGrouper`. -func (rgb *RefGroupBuilder) Finish() (sizes.RefGrouper, error) { +func (rgb *RefGroupBuilder) Finish(defaultAll bool) (sizes.RefGrouper, error) { if rgb.topLevelGroup.filter == nil { - rgb.topLevelGroup.filter = git.AllReferencesFilter + // User didn't specify any reference options. + if defaultAll { + rgb.topLevelGroup.filter = git.AllReferencesFilter + } else { + rgb.topLevelGroup.filter = git.NoReferencesFilter + } } refGrouper := refGrouper{ diff --git a/sizes/explicit_root.go b/sizes/explicit_root.go new file mode 100644 index 0000000..09348db --- /dev/null +++ b/sizes/explicit_root.go @@ -0,0 +1,19 @@ +package sizes + +import "github.com/github/git-sizer/git" + +type ExplicitRoot struct { + name string + oid git.OID +} + +func NewExplicitRoot(name string, oid git.OID) ExplicitRoot { + return ExplicitRoot{ + name: name, + oid: oid, + } +} + +func (er ExplicitRoot) Name() string { return er.name } +func (er ExplicitRoot) OID() git.OID { return er.oid } +func (er ExplicitRoot) Walk() bool { return true } diff --git a/sizes/graph.go b/sizes/graph.go index 7e923f6..0fb1c8a 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -11,48 +11,16 @@ import ( "github.com/github/git-sizer/meter" ) -// RefGroupSymbol is the string "identifier" that is used to refer to -// a refgroup, for example in the gitconfig. Nesting of refgroups is -// inferred from their names, using "." as separator between -// components. For example, if there are three refgroups with symbols -// "tags", "tags.releases", and "foo.bar", then "tags.releases" is -// considered to be nested within "tags", and "foo.bar" is considered -// to be nested within "foo", the latter being created automatically -// if it was not configured explicitly. -type RefGroupSymbol string - -// RefGroup is a group of references, for example "branches" or -// "tags". Reference groups might overlap. -type RefGroup struct { - // Symbol is the unique string by which this `RefGroup` is - // identified and configured. It consists of dot-separated - // components, which implicitly makes a nested tree-like - // structure. - Symbol RefGroupSymbol - - // Name is the name for this `ReferenceGroup` to be presented - // in user-readable output. - Name string +type Root interface { + Name() string + OID() git.OID + Walk() bool } -// RefGrouper describes a type that can collate reference names into -// groups and decide which ones to walk. -type RefGrouper interface { - // Categorize tells whether `refname` should be walked at all, - // and if so, the symbols of the reference groups to which it - // belongs. - Categorize(refname string) (bool, []RefGroupSymbol) - - // Groups returns the list of `ReferenceGroup`s, in the order - // that they should be presented. The return value might - // depend on which references have been seen so far. - Groups() []RefGroup -} - -type refSeen struct { - git.Reference - walked bool - groups []RefGroupSymbol +type ReferenceRoot interface { + Root + Reference() git.Reference + Groups() []RefGroupSymbol } // ScanRepositoryUsingGraph scans `repo`, using `rg` to decide which @@ -63,60 +31,36 @@ type refSeen struct { // // It returns the size data for the repository. func ScanRepositoryUsingGraph( - repo *git.Repository, rg RefGrouper, nameStyle NameStyle, + ctx context.Context, + repo *git.Repository, + roots []Root, + nameStyle NameStyle, progressMeter meter.Progress, ) (HistorySize, error) { - ctx, cancel := context.WithCancel(context.TODO()) - defer cancel() - - graph := NewGraph(rg, nameStyle) + graph := NewGraph(nameStyle) - refIter, err := repo.NewReferenceIter(ctx) - if err != nil { - return HistorySize{}, err - } - - objIter, err := repo.NewObjectIter(context.TODO()) + objIter, err := repo.NewObjectIter(ctx) if err != nil { return HistorySize{}, err } errChan := make(chan error, 1) - var refsSeen []refSeen - // Feed the references that we want into the stdin of the object - // iterator: + // Feed the references that we want to walk into the stdin of the + // object iterator: go func() { defer objIter.Close() errChan <- func() error { - for { - ref, ok, err := refIter.Next() - if err != nil { - return err - } - if !ok { - return nil - } - - walk, groups := rg.Categorize(ref.Refname) - - refsSeen = append( - refsSeen, - refSeen{ - Reference: ref, - walked: walk, - groups: groups, - }, - ) - - if !walk { + for _, root := range roots { + if !root.Walk() { continue } - if err := objIter.AddRoot(ref.OID); err != nil { + if err := objIter.AddRoot(root.OID()); err != nil { return err } } + return nil }() }() @@ -326,9 +270,15 @@ func ScanRepositoryUsingGraph( } progressMeter.Start("Processing references: %d") - for _, refSeen := range refsSeen { + for _, root := range roots { progressMeter.Inc() - graph.RegisterReference(refSeen.Reference, refSeen.walked, refSeen.groups) + if refRoot, ok := root.(ReferenceRoot); ok { + graph.RegisterReference(refRoot.Reference(), refRoot.Groups()) + } + + if root.Walk() { + graph.pathResolver.RecordName(root.Name(), root.OID()) + } } progressMeter.Done() @@ -337,8 +287,6 @@ func ScanRepositoryUsingGraph( // Graph is an object graph that is being built up. type Graph struct { - rg RefGrouper - blobLock sync.Mutex blobSizes map[git.OID]BlobSize @@ -361,10 +309,8 @@ type Graph struct { } // NewGraph creates and returns a new `*Graph` instance. -func NewGraph(rg RefGrouper, nameStyle NameStyle) *Graph { +func NewGraph(nameStyle NameStyle) *Graph { return &Graph{ - rg: rg, - blobSizes: make(map[git.OID]BlobSize), treeRecords: make(map[git.OID]*treeRecord), @@ -384,17 +330,18 @@ func NewGraph(rg RefGrouper, nameStyle NameStyle) *Graph { } // RegisterReference records the specified reference in `g`. -func (g *Graph) RegisterReference(ref git.Reference, walked bool, groups []RefGroupSymbol) { +func (g *Graph) RegisterReference(ref git.Reference, groups []RefGroupSymbol) { g.historyLock.Lock() g.historySize.recordReference(g, ref) for _, group := range groups { g.historySize.recordReferenceGroup(g, group) } g.historyLock.Unlock() +} - if walked { - g.pathResolver.RecordReference(ref) - } +// Register a name that can be used for the specified OID. +func (g *Graph) RegisterName(name string, oid git.OID) { + g.pathResolver.RecordName(name, oid) } // HistorySize returns the size data that have been collected. diff --git a/sizes/grouper.go b/sizes/grouper.go new file mode 100644 index 0000000..fdaa927 --- /dev/null +++ b/sizes/grouper.go @@ -0,0 +1,88 @@ +package sizes + +import ( + "context" + + "github.com/github/git-sizer/git" +) + +// RefGroupSymbol is the string "identifier" that is used to refer to +// a refgroup, for example in the gitconfig. Nesting of refgroups is +// inferred from their names, using "." as separator between +// components. For example, if there are three refgroups with symbols +// "tags", "tags.releases", and "foo.bar", then "tags.releases" is +// considered to be nested within "tags", and "foo.bar" is considered +// to be nested within "foo", the latter being created automatically +// if it was not configured explicitly. +type RefGroupSymbol string + +// RefGroup is a group of references, for example "branches" or +// "tags". Reference groups might overlap. +type RefGroup struct { + // Symbol is the unique string by which this `RefGroup` is + // identified and configured. It consists of dot-separated + // components, which implicitly makes a nested tree-like + // structure. + Symbol RefGroupSymbol + + // Name is the name for this `ReferenceGroup` to be presented + // in user-readable output. + Name string +} + +// RefGrouper describes a type that can collate reference names into +// groups and decide which ones to walk. +type RefGrouper interface { + // Categorize tells whether `refname` should be walked at all, + // and if so, the symbols of the reference groups to which it + // belongs. + Categorize(refname string) (bool, []RefGroupSymbol) + + // Groups returns the list of `ReferenceGroup`s, in the order + // that they should be presented. The return value might + // depend on which references have been seen so far. + Groups() []RefGroup +} + +type RefRoot struct { + ref git.Reference + walk bool + groups []RefGroupSymbol +} + +func (rr RefRoot) Name() string { return rr.ref.Refname } +func (rr RefRoot) OID() git.OID { return rr.ref.OID } +func (rr RefRoot) Reference() git.Reference { return rr.ref } +func (rr RefRoot) Walk() bool { return rr.walk } +func (rr RefRoot) Groups() []RefGroupSymbol { return rr.groups } + +func CollectReferences( + ctx context.Context, repo *git.Repository, rg RefGrouper, +) ([]RefRoot, error) { + refIter, err := repo.NewReferenceIter(ctx) + if err != nil { + return nil, err + } + + var refsSeen []RefRoot + for { + ref, ok, err := refIter.Next() + if err != nil { + return nil, err + } + if !ok { + return refsSeen, nil + } + + walk, groups := rg.Categorize(ref.Refname) + + refsSeen = append( + refsSeen, + RefRoot{ + ref: ref, + walk: walk, + groups: groups, + }, + ) + } +} diff --git a/sizes/path_resolver.go b/sizes/path_resolver.go index 2a3bb1c..275d19a 100644 --- a/sizes/path_resolver.go +++ b/sizes/path_resolver.go @@ -12,15 +12,15 @@ import ( // `rev-parse` input, including commit and/or file path) by which // specified objects are reachable. It is used as follows: // -// * Request an object's path using `RequestPath()`. The returned -// `Path` object is a placeholder for the object's path. +// - Request an object's path using `RequestPath()`. The returned +// `Path` object is a placeholder for the object's path. // -// * Tell the `PathResolver` about objects that might be along the -// object's reachability path, *in depth-first* order (i.e., -// referents before referers) by calling `RecordTree()`, -// `RecordCommit()`, `RecordTag()`, and `RecordReference()`,. +// - Tell the `PathResolver` about objects that might be along the +// object's reachability path, *in depth-first* order (i.e., +// referents before referers) by calling `RecordTree()`, +// `RecordCommit()`, `RecordTag()`, and `RecordReference()`,. // -// * Read the path out of the `Path` object using `Path.Path()`. +// - Read the path out of the `Path` object using `Path.Path()`. // // Multiple objects can be processed at once. // @@ -34,7 +34,7 @@ import ( type PathResolver interface { RequestPath(oid git.OID, objectType string) *Path ForgetPath(p *Path) - RecordReference(ref git.Reference) + RecordName(name string, oid git.OID) RecordTreeEntry(oid git.OID, name string, childOID git.OID) RecordCommit(oid, tree git.OID) RecordTag(oid git.OID, tag *git.Tag) @@ -60,7 +60,7 @@ func (n NullPathResolver) RequestPath(oid git.OID, objectType string) *Path { func (_ NullPathResolver) ForgetPath(p *Path) {} -func (_ NullPathResolver) RecordReference(ref git.Reference) {} +func (_ NullPathResolver) RecordName(name string, oid git.OID) {} func (_ NullPathResolver) RecordTreeEntry(oid git.OID, name string, childOID git.OID) {} @@ -77,19 +77,19 @@ type InOrderPathResolver struct { // (e.g., the biggest blob, or a tree containing the biggest blob, or // a commit whose tree contains the biggest blob). Valid states: // -// * `parent == nil && relativePath == ""`—we have not yet found -// anything that refers to this object. +// - `parent == nil && relativePath == ""`—we have not yet found +// anything that refers to this object. // -// * `parent != nil && relativePath == ""`—this object is a tree, and -// we have found a commit that refers to it. +// - `parent != nil && relativePath == ""`—this object is a tree, and +// we have found a commit that refers to it. // -// * `parent == nil && relativePath != ""`—we have found a reference -// that points directly at this object; `relativePath` is the full -// name of the reference. +// - `parent == nil && relativePath != ""`—we have found a reference +// that points directly at this object; `relativePath` is the full +// name of the reference. // -// * `parent != nil && relativePath != ""`—this object is a blob or -// tree, and we have found another tree that refers to it; -// `relativePath` is the corresponding tree entry name. +// - `parent != nil && relativePath != ""`—this object is a blob or +// tree, and we have found another tree that refers to it; +// `relativePath` is the corresponding tree entry name. type Path struct { // The OID of the object whose path we seek. This member is always // set. @@ -122,7 +122,8 @@ type Path struct { func (p *Path) TreePrefix() string { switch p.objectType { case "blob", "tree": - if p.parent != nil { + switch { + case p.parent != nil: if p.relativePath == "" { // This is a top-level tree or blob. return p.parent.TreePrefix() @@ -130,7 +131,9 @@ func (p *Path) TreePrefix() string { // The parent is also a tree. return p.parent.TreePrefix() + p.relativePath + "/" } - } else { + case p.relativePath != "": + return p.relativePath + "/" + default: return "???" } case "commit", "tag": @@ -153,7 +156,8 @@ func (p *Path) TreePrefix() string { func (p *Path) Path() string { switch p.objectType { case "blob", "tree": - if p.parent != nil { + switch { + case p.parent != nil: if p.relativePath == "" { // This is a top-level tree or blob. return fmt.Sprintf("%s^{%s}", p.parent.BestPath(), p.objectType) @@ -161,7 +165,9 @@ func (p *Path) Path() string { // The parent is also a tree. return p.parent.TreePrefix() + p.relativePath } - } else { + case p.relativePath != "": + return p.relativePath + default: return "" } case "commit", "tag": @@ -274,18 +280,18 @@ func (pr *InOrderPathResolver) forgetPathLocked(p *Path) { } } -func (pr *InOrderPathResolver) RecordReference(ref git.Reference) { +func (pr *InOrderPathResolver) RecordName(name string, oid git.OID) { pr.lock.Lock() defer pr.lock.Unlock() - p, ok := pr.soughtPaths[ref.OID] + p, ok := pr.soughtPaths[oid] if !ok { // Nobody is looking for the path to the referent. return } - p.relativePath = ref.Refname - delete(pr.soughtPaths, ref.OID) + p.relativePath = name + delete(pr.soughtPaths, oid) } // Record that the tree with OID `oid` has an entry with the specified