Skip to content
This repository was archived by the owner on Sep 11, 2020. It is now read-only.

Commit b0f6b47

Browse files
authored
Merge pull request #669 from keybase/strib/gh-gc
storage/repository: add new functions for garbage collection
2 parents 174fd8e + d532648 commit b0f6b47

15 files changed

+928
-74
lines changed

object_walker.go

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
package git
2+
3+
import (
4+
"fmt"
5+
6+
"gopkg.in/src-d/go-git.v4/plumbing"
7+
"gopkg.in/src-d/go-git.v4/plumbing/filemode"
8+
"gopkg.in/src-d/go-git.v4/plumbing/object"
9+
"gopkg.in/src-d/go-git.v4/storage"
10+
)
11+
12+
type objectWalker struct {
13+
Storer storage.Storer
14+
// seen is the set of objects seen in the repo.
15+
// seen map can become huge if walking over large
16+
// repos. Thus using struct{} as the value type.
17+
seen map[plumbing.Hash]struct{}
18+
}
19+
20+
func newObjectWalker(s storage.Storer) *objectWalker {
21+
return &objectWalker{s, map[plumbing.Hash]struct{}{}}
22+
}
23+
24+
// walkAllRefs walks all (hash) refererences from the repo.
25+
func (p *objectWalker) walkAllRefs() error {
26+
// Walk over all the references in the repo.
27+
it, err := p.Storer.IterReferences()
28+
if err != nil {
29+
return err
30+
}
31+
defer it.Close()
32+
err = it.ForEach(func(ref *plumbing.Reference) error {
33+
// Exit this iteration early for non-hash references.
34+
if ref.Type() != plumbing.HashReference {
35+
return nil
36+
}
37+
return p.walkObjectTree(ref.Hash())
38+
})
39+
if err != nil {
40+
return err
41+
}
42+
return nil
43+
}
44+
45+
func (p *objectWalker) isSeen(hash plumbing.Hash) bool {
46+
_, seen := p.seen[hash]
47+
return seen
48+
}
49+
50+
func (p *objectWalker) add(hash plumbing.Hash) {
51+
p.seen[hash] = struct{}{}
52+
}
53+
54+
// walkObjectTree walks over all objects and remembers references
55+
// to them in the objectWalker. This is used instead of the revlist
56+
// walks because memory usage is tight with huge repos.
57+
func (p *objectWalker) walkObjectTree(hash plumbing.Hash) error {
58+
// Check if we have already seen, and mark this object
59+
if p.isSeen(hash) {
60+
return nil
61+
}
62+
p.add(hash)
63+
// Fetch the object.
64+
obj, err := object.GetObject(p.Storer, hash)
65+
if err != nil {
66+
return fmt.Errorf("Getting object %s failed: %v", hash, err)
67+
}
68+
// Walk all children depending on object type.
69+
switch obj := obj.(type) {
70+
case *object.Commit:
71+
err = p.walkObjectTree(obj.TreeHash)
72+
if err != nil {
73+
return err
74+
}
75+
for _, h := range obj.ParentHashes {
76+
err = p.walkObjectTree(h)
77+
if err != nil {
78+
return err
79+
}
80+
}
81+
case *object.Tree:
82+
for i := range obj.Entries {
83+
// Shortcut for blob objects:
84+
// 'or' the lower bits of a mode and check that it
85+
// it matches a filemode.Executable. The type information
86+
// is in the higher bits, but this is the cleanest way
87+
// to handle plain files with different modes.
88+
// Other non-tree objects are somewhat rare, so they
89+
// are not special-cased.
90+
if obj.Entries[i].Mode|0755 == filemode.Executable {
91+
p.add(obj.Entries[i].Hash)
92+
continue
93+
}
94+
// Normal walk for sub-trees (and symlinks etc).
95+
err = p.walkObjectTree(obj.Entries[i].Hash)
96+
if err != nil {
97+
return err
98+
}
99+
}
100+
default:
101+
// Error out on unhandled object types.
102+
return fmt.Errorf("Unknown object %X %s %T\n", obj.ID(), obj.Type(), obj)
103+
}
104+
return nil
105+
}

plumbing/storer/object.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package storer
33
import (
44
"errors"
55
"io"
6+
"time"
67

78
"gopkg.in/src-d/go-git.v4/plumbing"
89
)
@@ -36,6 +37,9 @@ type EncodedObjectStorer interface {
3637
//
3738
// Valid plumbing.ObjectType values are CommitObject, BlobObject, TagObject,
3839
IterEncodedObjects(plumbing.ObjectType) (EncodedObjectIter, error)
40+
// HasEncodedObject returns ErrObjNotFound if the object doesn't
41+
// exist. If the object does exist, it returns nil.
42+
HasEncodedObject(plumbing.Hash) error
3943
}
4044

4145
// DeltaObjectStorer is an EncodedObjectStorer that can return delta
@@ -53,6 +57,34 @@ type Transactioner interface {
5357
Begin() Transaction
5458
}
5559

60+
// LooseObjectStorer is an optional interface for managing "loose"
61+
// objects, i.e. those not in packfiles.
62+
type LooseObjectStorer interface {
63+
// ForEachObjectHash iterates over all the (loose) object hashes
64+
// in the repository without necessarily having to read those objects.
65+
// Objects only inside pack files may be omitted.
66+
// If ErrStop is sent the iteration is stop but no error is returned.
67+
ForEachObjectHash(func(plumbing.Hash) error) error
68+
// LooseObjectTime looks up the (m)time associated with the
69+
// loose object (that is not in a pack file). Some
70+
// implementations (e.g. without loose objects)
71+
// always return an error.
72+
LooseObjectTime(plumbing.Hash) (time.Time, error)
73+
// DeleteLooseObject deletes a loose object if it exists.
74+
DeleteLooseObject(plumbing.Hash) error
75+
}
76+
77+
// PackedObjectStorer is an optional interface for managing objects in
78+
// packfiles.
79+
type PackedObjectStorer interface {
80+
// ObjectPacks returns hashes of object packs if the underlying
81+
// implementation has pack files.
82+
ObjectPacks() ([]plumbing.Hash, error)
83+
// DeleteOldObjectPackAndIndex deletes an object pack and the corresponding index file if they exist.
84+
// Deletion is only performed if the pack is older than the supplied time (or the time is zero).
85+
DeleteOldObjectPackAndIndex(plumbing.Hash, time.Time) error
86+
}
87+
5688
// PackfileWriter is a optional method for ObjectStorer, it enable direct write
5789
// of packfile to the storage
5890
type PackfileWriter interface {

plumbing/storer/object_test.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,15 @@ func (o *MockObjectStorage) SetEncodedObject(obj plumbing.EncodedObject) (plumbi
132132
return plumbing.ZeroHash, nil
133133
}
134134

135+
func (o *MockObjectStorage) HasEncodedObject(h plumbing.Hash) error {
136+
for _, o := range o.db {
137+
if o.Hash() == h {
138+
return nil
139+
}
140+
}
141+
return plumbing.ErrObjectNotFound
142+
}
143+
135144
func (o *MockObjectStorage) EncodedObject(t plumbing.ObjectType, h plumbing.Hash) (plumbing.EncodedObject, error) {
136145
for _, o := range o.db {
137146
if o.Hash() == h {

plumbing/storer/reference.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ type ReferenceStorer interface {
2424
Reference(plumbing.ReferenceName) (*plumbing.Reference, error)
2525
IterReferences() (ReferenceIter, error)
2626
RemoveReference(plumbing.ReferenceName) error
27+
CountLooseRefs() (int, error)
28+
PackRefs() error
2729
}
2830

2931
// ReferenceIter is a generic closable interface for iterating over references.

prune.go

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
package git
2+
3+
import (
4+
"errors"
5+
"time"
6+
7+
"gopkg.in/src-d/go-git.v4/plumbing"
8+
"gopkg.in/src-d/go-git.v4/plumbing/storer"
9+
)
10+
11+
type PruneHandler func(unreferencedObjectHash plumbing.Hash) error
12+
type PruneOptions struct {
13+
// OnlyObjectsOlderThan if set to non-zero value
14+
// selects only objects older than the time provided.
15+
OnlyObjectsOlderThan time.Time
16+
// Handler is called on matching objects
17+
Handler PruneHandler
18+
}
19+
20+
var ErrLooseObjectsNotSupported = errors.New("Loose objects not supported")
21+
22+
// DeleteObject deletes an object from a repository.
23+
// The type conveniently matches PruneHandler.
24+
func (r *Repository) DeleteObject(hash plumbing.Hash) error {
25+
los, ok := r.Storer.(storer.LooseObjectStorer)
26+
if !ok {
27+
return ErrLooseObjectsNotSupported
28+
}
29+
30+
return los.DeleteLooseObject(hash)
31+
}
32+
33+
func (r *Repository) Prune(opt PruneOptions) error {
34+
los, ok := r.Storer.(storer.LooseObjectStorer)
35+
if !ok {
36+
return ErrLooseObjectsNotSupported
37+
}
38+
39+
pw := newObjectWalker(r.Storer)
40+
err := pw.walkAllRefs()
41+
if err != nil {
42+
return err
43+
}
44+
// Now walk all (loose) objects in storage.
45+
return los.ForEachObjectHash(func(hash plumbing.Hash) error {
46+
// Get out if we have seen this object.
47+
if pw.isSeen(hash) {
48+
return nil
49+
}
50+
// Otherwise it is a candidate for pruning.
51+
// Check out for too new objects next.
52+
if opt.OnlyObjectsOlderThan != (time.Time{}) {
53+
// Errors here are non-fatal. The object may be e.g. packed.
54+
// Or concurrently deleted. Skip such objects.
55+
t, err := los.LooseObjectTime(hash)
56+
if err != nil {
57+
return nil
58+
}
59+
// Skip too new objects.
60+
if !t.Before(opt.OnlyObjectsOlderThan) {
61+
return nil
62+
}
63+
}
64+
return opt.Handler(hash)
65+
})
66+
}

prune_test.go

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
package git
2+
3+
import (
4+
"time"
5+
6+
"gopkg.in/src-d/go-git.v4/plumbing"
7+
"gopkg.in/src-d/go-git.v4/plumbing/storer"
8+
"gopkg.in/src-d/go-git.v4/storage"
9+
"gopkg.in/src-d/go-git.v4/storage/filesystem"
10+
11+
. "gopkg.in/check.v1"
12+
"gopkg.in/src-d/go-git-fixtures.v3"
13+
)
14+
15+
type PruneSuite struct {
16+
BaseSuite
17+
}
18+
19+
var _ = Suite(&PruneSuite{})
20+
21+
func (s *PruneSuite) testPrune(c *C, deleteTime time.Time) {
22+
srcFs := fixtures.ByTag("unpacked").One().DotGit()
23+
var sto storage.Storer
24+
var err error
25+
sto, err = filesystem.NewStorage(srcFs)
26+
c.Assert(err, IsNil)
27+
28+
los := sto.(storer.LooseObjectStorer)
29+
c.Assert(los, NotNil)
30+
31+
count := 0
32+
err = los.ForEachObjectHash(func(_ plumbing.Hash) error {
33+
count++
34+
return nil
35+
})
36+
c.Assert(err, IsNil)
37+
38+
r, err := Open(sto, srcFs)
39+
c.Assert(err, IsNil)
40+
c.Assert(r, NotNil)
41+
42+
// Remove a branch so we can prune some objects.
43+
err = sto.RemoveReference(plumbing.ReferenceName("refs/heads/v4"))
44+
c.Assert(err, IsNil)
45+
err = sto.RemoveReference(plumbing.ReferenceName("refs/remotes/origin/v4"))
46+
c.Assert(err, IsNil)
47+
48+
err = r.Prune(PruneOptions{
49+
OnlyObjectsOlderThan: deleteTime,
50+
Handler: r.DeleteObject,
51+
})
52+
c.Assert(err, IsNil)
53+
54+
newCount := 0
55+
err = los.ForEachObjectHash(func(_ plumbing.Hash) error {
56+
newCount++
57+
return nil
58+
})
59+
if deleteTime.IsZero() {
60+
c.Assert(newCount < count, Equals, true)
61+
} else {
62+
// Assume a delete time older than any of the objects was passed in.
63+
c.Assert(newCount, Equals, count)
64+
}
65+
}
66+
67+
func (s *PruneSuite) TestPrune(c *C) {
68+
s.testPrune(c, time.Time{})
69+
}
70+
71+
func (s *PruneSuite) TestPruneWithNoDelete(c *C) {
72+
s.testPrune(c, time.Unix(0, 1))
73+
}

0 commit comments

Comments
 (0)