Skip to content
This repository was archived by the owner on Sep 11, 2020. It is now read-only.

Commit 44a20de

Browse files
authored
Merge pull request #1121 from filipnavara/small-deltas
plumbing: packfile, apply small object reading optimization also for delta objects
2 parents 364866f + 78bab69 commit 44a20de

File tree

1 file changed

+126
-56
lines changed

1 file changed

+126
-56
lines changed

plumbing/format/packfile/packfile.go

Lines changed: 126 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -76,20 +76,18 @@ func (p *Packfile) Get(h plumbing.Hash) (plumbing.EncodedObject, error) {
7676
return nil, err
7777
}
7878

79-
return p.GetByOffset(offset)
79+
return p.objectAtOffset(offset, h)
8080
}
8181

82-
// GetByOffset retrieves the encoded object from the packfile with the given
82+
// GetByOffset retrieves the encoded object from the packfile at the given
8383
// offset.
8484
func (p *Packfile) GetByOffset(o int64) (plumbing.EncodedObject, error) {
8585
hash, err := p.FindHash(o)
86-
if err == nil {
87-
if obj, ok := p.deltaBaseCache.Get(hash); ok {
88-
return obj, nil
89-
}
86+
if err != nil {
87+
return nil, err
9088
}
9189

92-
return p.objectAtOffset(o)
90+
return p.objectAtOffset(o, hash)
9391
}
9492

9593
// GetSizeByOffset retrieves the size of the encoded object from the
@@ -122,6 +120,13 @@ func (p *Packfile) nextObjectHeader() (*ObjectHeader, error) {
122120
return h, err
123121
}
124122

123+
func (p *Packfile) getDeltaObjectSize(buf *bytes.Buffer) int64 {
124+
delta := buf.Bytes()
125+
_, delta = decodeLEB128(delta) // skip src size
126+
sz, _ := decodeLEB128(delta)
127+
return int64(sz)
128+
}
129+
125130
func (p *Packfile) getObjectSize(h *ObjectHeader) (int64, error) {
126131
switch h.Type {
127132
case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject:
@@ -135,10 +140,7 @@ func (p *Packfile) getObjectSize(h *ObjectHeader) (int64, error) {
135140
return 0, err
136141
}
137142

138-
delta := buf.Bytes()
139-
_, delta = decodeLEB128(delta) // skip src size
140-
sz, _ := decodeLEB128(delta)
141-
return int64(sz), nil
143+
return p.getDeltaObjectSize(buf), nil
142144
default:
143145
return 0, ErrInvalidObject.AddDetails("type %q", h.Type)
144146
}
@@ -176,10 +178,16 @@ func (p *Packfile) getObjectType(h *ObjectHeader) (typ plumbing.ObjectType, err
176178
err = ErrInvalidObject.AddDetails("type %q", h.Type)
177179
}
178180

181+
p.offsetToType[h.Offset] = typ
182+
179183
return
180184
}
181185

182-
func (p *Packfile) objectAtOffset(offset int64) (plumbing.EncodedObject, error) {
186+
func (p *Packfile) objectAtOffset(offset int64, hash plumbing.Hash) (plumbing.EncodedObject, error) {
187+
if obj, ok := p.cacheGet(hash); ok {
188+
return obj, nil
189+
}
190+
183191
h, err := p.objectHeaderAtOffset(offset)
184192
if err != nil {
185193
if err == io.EOF || isInvalid(err) {
@@ -188,27 +196,54 @@ func (p *Packfile) objectAtOffset(offset int64) (plumbing.EncodedObject, error)
188196
return nil, err
189197
}
190198

199+
return p.getNextObject(h, hash)
200+
}
201+
202+
func (p *Packfile) getNextObject(h *ObjectHeader, hash plumbing.Hash) (plumbing.EncodedObject, error) {
203+
var err error
204+
191205
// If we have no filesystem, we will return a MemoryObject instead
192206
// of an FSObject.
193207
if p.fs == nil {
194-
return p.getNextObject(h)
208+
return p.getNextMemoryObject(h)
195209
}
196210

197-
// If the object is not a delta and it's small enough then read it
198-
// completely into memory now since it is already read from disk
199-
// into buffer anyway.
200-
if h.Length <= smallObjectThreshold && h.Type != plumbing.OFSDeltaObject && h.Type != plumbing.REFDeltaObject {
201-
return p.getNextObject(h)
202-
}
211+
// If the object is small enough then read it completely into memory now since
212+
// it is already read from disk into buffer anyway. For delta objects we want
213+
// to perform the optimization too, but we have to be careful about applying
214+
// small deltas on big objects.
215+
var size int64
216+
if h.Length <= smallObjectThreshold {
217+
if h.Type != plumbing.OFSDeltaObject && h.Type != plumbing.REFDeltaObject {
218+
return p.getNextMemoryObject(h)
219+
}
203220

204-
hash, err := p.FindHash(h.Offset)
205-
if err != nil {
206-
return nil, err
207-
}
221+
// For delta objects we read the delta data and apply the small object
222+
// optimization only if the expanded version of the object still meets
223+
// the small object threshold condition.
224+
buf := bufPool.Get().(*bytes.Buffer)
225+
buf.Reset()
226+
if _, _, err := p.s.NextObject(buf); err != nil {
227+
return nil, err
228+
}
229+
defer bufPool.Put(buf)
208230

209-
size, err := p.getObjectSize(h)
210-
if err != nil {
211-
return nil, err
231+
size = p.getDeltaObjectSize(buf)
232+
if size <= smallObjectThreshold {
233+
var obj = new(plumbing.MemoryObject)
234+
obj.SetSize(size)
235+
if h.Type == plumbing.REFDeltaObject {
236+
err = p.fillREFDeltaObjectContentWithBuffer(obj, h.Reference, buf)
237+
} else {
238+
err = p.fillOFSDeltaObjectContentWithBuffer(obj, h.OffsetReference, buf)
239+
}
240+
return obj, err
241+
}
242+
} else {
243+
size, err = p.getObjectSize(h)
244+
if err != nil {
245+
return nil, err
246+
}
212247
}
213248

214249
typ, err := p.getObjectType(h)
@@ -231,33 +266,22 @@ func (p *Packfile) objectAtOffset(offset int64) (plumbing.EncodedObject, error)
231266
}
232267

233268
func (p *Packfile) getObjectContent(offset int64) (io.ReadCloser, error) {
234-
ref, err := p.FindHash(offset)
235-
if err == nil {
236-
obj, ok := p.cacheGet(ref)
237-
if ok {
238-
reader, err := obj.Reader()
239-
if err != nil {
240-
return nil, err
241-
}
242-
243-
return reader, nil
244-
}
245-
}
246-
247269
h, err := p.objectHeaderAtOffset(offset)
248270
if err != nil {
249271
return nil, err
250272
}
251273

252-
obj, err := p.getNextObject(h)
274+
// getObjectContent is called from FSObject, so we have to explicitly
275+
// get memory object here to avoid recursive cycle
276+
obj, err := p.getNextMemoryObject(h)
253277
if err != nil {
254278
return nil, err
255279
}
256280

257281
return obj.Reader()
258282
}
259283

260-
func (p *Packfile) getNextObject(h *ObjectHeader) (plumbing.EncodedObject, error) {
284+
func (p *Packfile) getNextMemoryObject(h *ObjectHeader) (plumbing.EncodedObject, error) {
261285
var obj = new(plumbing.MemoryObject)
262286
obj.SetSize(h.Length)
263287
obj.SetType(h.Type)
@@ -278,6 +302,8 @@ func (p *Packfile) getNextObject(h *ObjectHeader) (plumbing.EncodedObject, error
278302
return nil, err
279303
}
280304

305+
p.offsetToType[h.Offset] = obj.Type()
306+
281307
return obj, nil
282308
}
283309

@@ -300,6 +326,13 @@ func (p *Packfile) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plu
300326
if err != nil {
301327
return err
302328
}
329+
defer bufPool.Put(buf)
330+
331+
return p.fillREFDeltaObjectContentWithBuffer(obj, ref, buf)
332+
}
333+
334+
func (p *Packfile) fillREFDeltaObjectContentWithBuffer(obj plumbing.EncodedObject, ref plumbing.Hash, buf *bytes.Buffer) error {
335+
var err error
303336

304337
base, ok := p.cacheGet(ref)
305338
if !ok {
@@ -312,30 +345,31 @@ func (p *Packfile) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plu
312345
obj.SetType(base.Type())
313346
err = ApplyDelta(obj, base, buf.Bytes())
314347
p.cachePut(obj)
315-
bufPool.Put(buf)
316348

317349
return err
318350
}
319351

320352
func (p *Packfile) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset int64) error {
321-
buf := bytes.NewBuffer(nil)
353+
buf := bufPool.Get().(*bytes.Buffer)
354+
buf.Reset()
322355
_, _, err := p.s.NextObject(buf)
323356
if err != nil {
324357
return err
325358
}
359+
defer bufPool.Put(buf)
326360

327-
var base plumbing.EncodedObject
328-
var ok bool
361+
return p.fillOFSDeltaObjectContentWithBuffer(obj, offset, buf)
362+
}
363+
364+
func (p *Packfile) fillOFSDeltaObjectContentWithBuffer(obj plumbing.EncodedObject, offset int64, buf *bytes.Buffer) error {
329365
hash, err := p.FindHash(offset)
330-
if err == nil {
331-
base, ok = p.cacheGet(hash)
366+
if err != nil {
367+
return err
332368
}
333369

334-
if !ok {
335-
base, err = p.GetByOffset(offset)
336-
if err != nil {
337-
return err
338-
}
370+
base, err := p.objectAtOffset(offset, hash)
371+
if err != nil {
372+
return err
339373
}
340374

341375
obj.SetType(base.Type())
@@ -442,14 +476,50 @@ func (i *objectIter) Next() (plumbing.EncodedObject, error) {
442476
return nil, err
443477
}
444478

445-
obj, err := i.p.GetByOffset(int64(e.Offset))
479+
if i.typ != plumbing.AnyObject {
480+
if typ, ok := i.p.offsetToType[int64(e.Offset)]; ok {
481+
if typ != i.typ {
482+
continue
483+
}
484+
} else if obj, ok := i.p.cacheGet(e.Hash); ok {
485+
if obj.Type() != i.typ {
486+
i.p.offsetToType[int64(e.Offset)] = obj.Type()
487+
continue
488+
}
489+
return obj, nil
490+
} else {
491+
h, err := i.p.objectHeaderAtOffset(int64(e.Offset))
492+
if err != nil {
493+
return nil, err
494+
}
495+
496+
if h.Type == plumbing.REFDeltaObject || h.Type == plumbing.OFSDeltaObject {
497+
typ, err := i.p.getObjectType(h)
498+
if err != nil {
499+
return nil, err
500+
}
501+
if typ != i.typ {
502+
i.p.offsetToType[int64(e.Offset)] = typ
503+
continue
504+
}
505+
// getObjectType will seek in the file so we cannot use getNextObject safely
506+
return i.p.objectAtOffset(int64(e.Offset), e.Hash)
507+
} else {
508+
if h.Type != i.typ {
509+
i.p.offsetToType[int64(e.Offset)] = h.Type
510+
continue
511+
}
512+
return i.p.getNextObject(h, e.Hash)
513+
}
514+
}
515+
}
516+
517+
obj, err := i.p.objectAtOffset(int64(e.Offset), e.Hash)
446518
if err != nil {
447519
return nil, err
448520
}
449521

450-
if i.typ == plumbing.AnyObject || obj.Type() == i.typ {
451-
return obj, nil
452-
}
522+
return obj, nil
453523
}
454524
}
455525

0 commit comments

Comments
 (0)