Skip to content
This repository was archived by the owner on Sep 11, 2020. It is now read-only.

Commit c5f1056

Browse files
ajnavarromcuadros
authored andcommitted
packfile/decoder: speed up packfile iterator when specific type (#200)
1 parent a2ecbbc commit c5f1056

File tree

4 files changed

+204
-28
lines changed

4 files changed

+204
-28
lines changed

plumbing/format/packfile/decoder.go

Lines changed: 88 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,9 @@ type Decoder struct {
5959
offsetToHash map[int64]plumbing.Hash
6060
hashToOffset map[plumbing.Hash]int64
6161
crcs map[plumbing.Hash]uint32
62+
63+
offsetToType map[int64]plumbing.ObjectType
64+
decoderType plumbing.ObjectType
6265
}
6366

6467
// NewDecoder returns a new Decoder that decodes a Packfile using the given
@@ -72,6 +75,22 @@ type Decoder struct {
7275
// If the ObjectStorer implements storer.Transactioner, a transaction is created
7376
// during the Decode execution, if something fails the Rollback is called
7477
func NewDecoder(s *Scanner, o storer.EncodedObjectStorer) (*Decoder, error) {
78+
return NewDecoderForType(s, o, plumbing.AnyObject)
79+
}
80+
81+
// NewDecoderForType returns a new Decoder but in this case for a specific object type.
82+
// When an object is read using this Decoder instance and it is not of the same type of
83+
// the specified one, nil will be returned. This is intended to avoid the content
84+
// deserialization of all the objects
85+
func NewDecoderForType(s *Scanner, o storer.EncodedObjectStorer,
86+
t plumbing.ObjectType) (*Decoder, error) {
87+
88+
if t == plumbing.OFSDeltaObject ||
89+
t == plumbing.REFDeltaObject ||
90+
t == plumbing.InvalidObject {
91+
return nil, plumbing.ErrInvalidType
92+
}
93+
7594
if !canResolveDeltas(s, o) {
7695
return nil, ErrResolveDeltasNotSupported
7796
}
@@ -83,6 +102,9 @@ func NewDecoder(s *Scanner, o storer.EncodedObjectStorer) (*Decoder, error) {
83102
offsetToHash: make(map[int64]plumbing.Hash, 0),
84103
hashToOffset: make(map[plumbing.Hash]int64, 0),
85104
crcs: make(map[plumbing.Hash]uint32, 0),
105+
106+
offsetToType: make(map[int64]plumbing.ObjectType, 0),
107+
decoderType: t,
86108
}, nil
87109
}
88110

@@ -174,17 +196,82 @@ func (d *Decoder) decodeObjectsWithObjectStorerTx(count int) error {
174196

175197
// DecodeObject reads the next object from the scanner and returns it. This
176198
// method can be used in replacement of the Decode method, to work in a
177-
// interative way
199+
// interactive way. If you created a new decoder instance using NewDecoderForType
200+
// constructor, if the object decoded is not equals to the specified one, nil will
201+
// be returned
178202
func (d *Decoder) DecodeObject() (plumbing.EncodedObject, error) {
179203
h, err := d.s.NextObjectHeader()
180204
if err != nil {
181205
return nil, err
182206
}
183207

208+
if d.decoderType == plumbing.AnyObject {
209+
return d.decodeByHeader(h)
210+
}
211+
212+
return d.decodeIfSpecificType(h)
213+
}
214+
215+
func (d *Decoder) decodeIfSpecificType(h *ObjectHeader) (plumbing.EncodedObject, error) {
216+
var realType plumbing.ObjectType
217+
var err error
218+
switch h.Type {
219+
case plumbing.OFSDeltaObject:
220+
realType, err = d.ofsDeltaType(h.OffsetReference)
221+
case plumbing.REFDeltaObject:
222+
realType, err = d.refDeltaType(h.Reference)
223+
224+
// If a reference delta is not found, it means that it isn't of
225+
// the type we are looking for, because we don't have any reference
226+
// and it is not present into the object storer
227+
if err == plumbing.ErrObjectNotFound {
228+
return nil, nil
229+
}
230+
default:
231+
realType = h.Type
232+
}
233+
234+
if err != nil {
235+
return nil, err
236+
}
237+
238+
d.offsetToType[h.Offset] = realType
239+
240+
if d.decoderType == realType {
241+
return d.decodeByHeader(h)
242+
}
243+
244+
return nil, nil
245+
}
246+
247+
func (d *Decoder) ofsDeltaType(offset int64) (plumbing.ObjectType, error) {
248+
t, ok := d.offsetToType[offset]
249+
if !ok {
250+
return plumbing.InvalidObject, plumbing.ErrObjectNotFound
251+
}
252+
253+
return t, nil
254+
}
255+
256+
func (d *Decoder) refDeltaType(ref plumbing.Hash) (plumbing.ObjectType, error) {
257+
if o, ok := d.hashToOffset[ref]; ok {
258+
return d.ofsDeltaType(o)
259+
}
260+
261+
obj, err := d.o.EncodedObject(plumbing.AnyObject, ref)
262+
if err != nil {
263+
return plumbing.InvalidObject, err
264+
}
265+
266+
return obj.Type(), nil
267+
}
268+
269+
func (d *Decoder) decodeByHeader(h *ObjectHeader) (plumbing.EncodedObject, error) {
184270
obj := d.newObject()
185271
obj.SetSize(h.Length)
186272
obj.SetType(h.Type)
187273
var crc uint32
274+
var err error
188275
switch h.Type {
189276
case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject:
190277
crc, err = d.fillRegularObjectContent(obj)

plumbing/format/packfile/decoder_test.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,52 @@ func (s *ReaderSuite) TestDecode(c *C) {
4747
})
4848
}
4949

50+
func (s *ReaderSuite) TestDecodeByType(c *C) {
51+
ts := []plumbing.ObjectType{
52+
plumbing.CommitObject,
53+
plumbing.TagObject,
54+
plumbing.TreeObject,
55+
plumbing.BlobObject,
56+
}
57+
58+
fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) {
59+
for _, t := range ts {
60+
storage := memory.NewStorage()
61+
scanner := packfile.NewScanner(f.Packfile())
62+
d, err := packfile.NewDecoderForType(scanner, storage, t)
63+
c.Assert(err, IsNil)
64+
defer d.Close()
65+
66+
_, count, err := scanner.Header()
67+
c.Assert(err, IsNil)
68+
69+
var i uint32
70+
for i = 0; i < count; i++ {
71+
obj, err := d.DecodeObject()
72+
c.Assert(err, IsNil)
73+
74+
if obj != nil {
75+
c.Assert(obj.Type(), Equals, t)
76+
}
77+
}
78+
}
79+
})
80+
}
81+
func (s *ReaderSuite) TestDecodeByTypeConstructor(c *C) {
82+
f := fixtures.Basic().ByTag("packfile").One()
83+
storage := memory.NewStorage()
84+
scanner := packfile.NewScanner(f.Packfile())
85+
86+
_, err := packfile.NewDecoderForType(scanner, storage, plumbing.OFSDeltaObject)
87+
c.Assert(err, Equals, plumbing.ErrInvalidType)
88+
89+
_, err = packfile.NewDecoderForType(scanner, storage, plumbing.REFDeltaObject)
90+
c.Assert(err, Equals, plumbing.ErrInvalidType)
91+
92+
_, err = packfile.NewDecoderForType(scanner, storage, plumbing.InvalidObject)
93+
c.Assert(err, Equals, plumbing.ErrInvalidType)
94+
}
95+
5096
func (s *ReaderSuite) TestDecodeMultipleTimes(c *C) {
5197
f := fixtures.Basic().ByTag("packfile").One()
5298
scanner := packfile.NewScanner(f.Packfile())

storage/filesystem/object.go

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -271,14 +271,18 @@ type packfileIter struct {
271271
total uint32
272272
}
273273

274+
func NewPackfileIter(f billy.File, t plumbing.ObjectType) (storer.EncodedObjectIter, error) {
275+
return newPackfileIter(f, t, make(map[plumbing.Hash]bool))
276+
}
277+
274278
func newPackfileIter(f billy.File, t plumbing.ObjectType, seen map[plumbing.Hash]bool) (storer.EncodedObjectIter, error) {
275279
s := packfile.NewScanner(f)
276280
_, total, err := s.Header()
277281
if err != nil {
278282
return nil, err
279283
}
280284

281-
d, err := packfile.NewDecoder(s, memory.NewStorage())
285+
d, err := packfile.NewDecoderForType(s, memory.NewStorage(), t)
282286
if err != nil {
283287
return nil, err
284288
}
@@ -294,25 +298,27 @@ func newPackfileIter(f billy.File, t plumbing.ObjectType, seen map[plumbing.Hash
294298
}
295299

296300
func (iter *packfileIter) Next() (plumbing.EncodedObject, error) {
297-
if iter.position >= iter.total {
298-
return nil, io.EOF
299-
}
301+
for {
302+
if iter.position >= iter.total {
303+
return nil, io.EOF
304+
}
300305

301-
obj, err := iter.d.DecodeObject()
302-
if err != nil {
303-
return nil, err
304-
}
306+
obj, err := iter.d.DecodeObject()
307+
if err != nil {
308+
return nil, err
309+
}
305310

306-
iter.position++
307-
if iter.seen[obj.Hash()] {
308-
return iter.Next()
309-
}
311+
iter.position++
312+
if obj == nil {
313+
continue
314+
}
310315

311-
if iter.t != plumbing.AnyObject && iter.t != obj.Type() {
312-
return iter.Next()
313-
}
316+
if iter.seen[obj.Hash()] {
317+
return iter.Next()
318+
}
314319

315-
return obj, nil
320+
return obj, nil
321+
}
316322
}
317323

318324
// ForEach is never called since is used inside of a MultiObjectIterator

storage/filesystem/object_test.go

Lines changed: 48 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,17 @@ import (
1010

1111
type FsSuite struct {
1212
fixtures.Suite
13+
Types []plumbing.ObjectType
1314
}
1415

15-
var _ = Suite(&FsSuite{})
16+
var _ = Suite(&FsSuite{
17+
Types: []plumbing.ObjectType{
18+
plumbing.CommitObject,
19+
plumbing.TagObject,
20+
plumbing.TreeObject,
21+
plumbing.BlobObject,
22+
},
23+
})
1624

1725
func (s *FsSuite) TestGetFromObjectFile(c *C) {
1826
fs := fixtures.ByTag(".git").ByTag("unpacked").One().DotGit()
@@ -76,18 +84,47 @@ func (s *FsSuite) TestIter(c *C) {
7684

7785
func (s *FsSuite) TestIterWithType(c *C) {
7886
fixtures.ByTag(".git").Test(c, func(f *fixtures.Fixture) {
79-
fs := f.DotGit()
80-
o, err := newObjectStorage(dotgit.New(fs))
81-
c.Assert(err, IsNil)
87+
for _, t := range s.Types {
88+
fs := f.DotGit()
89+
o, err := newObjectStorage(dotgit.New(fs))
90+
c.Assert(err, IsNil)
8291

83-
iter, err := o.IterEncodedObjects(plumbing.CommitObject)
84-
c.Assert(err, IsNil)
92+
iter, err := o.IterEncodedObjects(t)
93+
c.Assert(err, IsNil)
8594

86-
err = iter.ForEach(func(o plumbing.EncodedObject) error {
87-
c.Assert(o.Type(), Equals, plumbing.CommitObject)
88-
return nil
89-
})
95+
err = iter.ForEach(func(o plumbing.EncodedObject) error {
96+
c.Assert(o.Type(), Equals, t)
97+
return nil
98+
})
99+
100+
c.Assert(err, IsNil)
101+
}
90102

91-
c.Assert(err, IsNil)
92103
})
93104
}
105+
106+
func (s *FsSuite) TestPackfileIter(c *C) {
107+
fixtures.ByTag(".git").Test(c, func(f *fixtures.Fixture) {
108+
fs := f.DotGit()
109+
dg := dotgit.New(fs)
110+
111+
for _, t := range s.Types {
112+
ph, err := dg.ObjectPacks()
113+
c.Assert(err, IsNil)
114+
115+
for _, h := range ph {
116+
f, err := dg.ObjectPack(h)
117+
c.Assert(err, IsNil)
118+
iter, err := NewPackfileIter(f, t)
119+
c.Assert(err, IsNil)
120+
err = iter.ForEach(func(o plumbing.EncodedObject) error {
121+
c.Assert(o.Type(), Equals, t)
122+
return nil
123+
})
124+
125+
c.Assert(err, IsNil)
126+
}
127+
}
128+
})
129+
130+
}

0 commit comments

Comments
 (0)