Skip to content
This repository was archived by the owner on Sep 11, 2020. It is now read-only.

Commit 73c775e

Browse files
authored
Merge pull request #846 from dsymonds/compact
packfile: improve Index memory representation to be more compact
2 parents d33d3ef + cf532f9 commit 73c775e

File tree

2 files changed

+67
-23
lines changed

2 files changed

+67
-23
lines changed

plumbing/format/packfile/index.go

Lines changed: 43 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package packfile
22

33
import (
4+
"sort"
5+
46
"gopkg.in/src-d/go-git.v4/plumbing"
57
"gopkg.in/src-d/go-git.v4/plumbing/format/idxfile"
68
)
@@ -10,7 +12,7 @@ import (
1012
// or to store them.
1113
type Index struct {
1214
byHash map[plumbing.Hash]*idxfile.Entry
13-
byOffset map[uint64]*idxfile.Entry
15+
byOffset []*idxfile.Entry // sorted by their offset
1416
}
1517

1618
// NewIndex creates a new empty index with the given size. Size is a hint and
@@ -19,36 +21,62 @@ type Index struct {
1921
func NewIndex(size int) *Index {
2022
return &Index{
2123
byHash: make(map[plumbing.Hash]*idxfile.Entry, size),
22-
byOffset: make(map[uint64]*idxfile.Entry, size),
24+
byOffset: make([]*idxfile.Entry, 0, size),
2325
}
2426
}
2527

2628
// NewIndexFromIdxFile creates a new Index from an idxfile.IdxFile.
2729
func NewIndexFromIdxFile(idxf *idxfile.Idxfile) *Index {
2830
idx := &Index{
2931
byHash: make(map[plumbing.Hash]*idxfile.Entry, idxf.ObjectCount),
30-
byOffset: make(map[uint64]*idxfile.Entry, idxf.ObjectCount),
32+
byOffset: make([]*idxfile.Entry, 0, idxf.ObjectCount),
3133
}
3234
for _, e := range idxf.Entries {
33-
idx.add(e)
35+
idx.addUnsorted(e)
3436
}
37+
sort.Sort(orderByOffset(idx.byOffset))
3538

3639
return idx
3740
}
3841

42+
// orderByOffset is a sort.Interface adapter that arranges
43+
// a slice of entries by their offset.
44+
type orderByOffset []*idxfile.Entry
45+
46+
func (o orderByOffset) Len() int { return len(o) }
47+
func (o orderByOffset) Less(i, j int) bool { return o[i].Offset < o[j].Offset }
48+
func (o orderByOffset) Swap(i, j int) { o[i], o[j] = o[j], o[i] }
49+
3950
// Add adds a new Entry with the given values to the index.
4051
func (idx *Index) Add(h plumbing.Hash, offset uint64, crc32 uint32) {
41-
e := idxfile.Entry{
52+
e := &idxfile.Entry{
4253
Hash: h,
4354
Offset: offset,
4455
CRC32: crc32,
4556
}
46-
idx.add(&e)
57+
idx.byHash[e.Hash] = e
58+
59+
// Find the right position in byOffset.
60+
// Look for the first position whose offset is *greater* than e.Offset.
61+
i := sort.Search(len(idx.byOffset), func(i int) bool {
62+
return idx.byOffset[i].Offset > offset
63+
})
64+
if i == len(idx.byOffset) {
65+
// Simple case: add it to the end.
66+
idx.byOffset = append(idx.byOffset, e)
67+
return
68+
}
69+
// Harder case: shift existing entries down by one to make room.
70+
// Append a nil entry first so we can use existing capacity in case
71+
// the index was carefully preallocated.
72+
idx.byOffset = append(idx.byOffset, nil)
73+
copy(idx.byOffset[i+1:], idx.byOffset[i:len(idx.byOffset)-1])
74+
idx.byOffset[i] = e
4775
}
4876

49-
func (idx *Index) add(e *idxfile.Entry) {
77+
func (idx *Index) addUnsorted(e *idxfile.Entry) {
5078
idx.byHash[e.Hash] = e
51-
idx.byOffset[e.Offset] = e
79+
idx.byOffset = append(idx.byOffset, e)
5280
}
5381

5482
// LookupHash looks an entry up by its hash. An idxfile.Entry is returned and
@@ -61,8 +89,13 @@ func (idx *Index) LookupHash(h plumbing.Hash) (*idxfile.Entry, bool) {
6189
// LookupHash looks an entry up by its offset in the packfile. An idxfile.Entry
6290
// is returned and a bool, which is true if it was found or false if it wasn't.
6391
func (idx *Index) LookupOffset(offset uint64) (*idxfile.Entry, bool) {
64-
e, ok := idx.byOffset[offset]
65-
return e, ok
92+
i := sort.Search(len(idx.byOffset), func(i int) bool {
93+
return idx.byOffset[i].Offset >= offset
94+
})
95+
if i >= len(idx.byOffset) || idx.byOffset[i].Offset != offset {
96+
return nil, false // not present
97+
}
98+
return idx.byOffset[i], true
6699
}
67100

68101
// Size returns the number of entries in the index.

plumbing/format/packfile/index_test.go

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package packfile
33
import (
44
"strconv"
55
"strings"
6+
"testing"
67

78
"gopkg.in/src-d/go-git.v4/plumbing"
89

@@ -26,12 +27,12 @@ func (s *IndexSuite) TestLookupOffset(c *C) {
2627
e, ok := idx.LookupOffset(uint64(o2))
2728
c.Assert(ok, Equals, true)
2829
c.Assert(e, NotNil)
29-
c.Assert(e.Hash, Equals, s.toHash(o2))
30+
c.Assert(e.Hash, Equals, toHash(o2))
3031
c.Assert(e.Offset, Equals, uint64(o2))
3132
}
3233
}
3334

34-
h1 := s.toHash(o1)
35+
h1 := toHash(o1)
3536
idx.Add(h1, uint64(o1), 0)
3637

3738
for o2 := 0; o2 < 10000; o2 += 100 {
@@ -43,7 +44,7 @@ func (s *IndexSuite) TestLookupOffset(c *C) {
4344
e, ok := idx.LookupOffset(uint64(o2))
4445
c.Assert(ok, Equals, true)
4546
c.Assert(e, NotNil)
46-
c.Assert(e.Hash, Equals, s.toHash(o2))
47+
c.Assert(e.Hash, Equals, toHash(o2))
4748
c.Assert(e.Offset, Equals, uint64(o2))
4849
}
4950
}
@@ -56,31 +57,31 @@ func (s *IndexSuite) TestLookupHash(c *C) {
5657
for o1 := 0; o1 < 10000; o1 += 100 {
5758
for o2 := 0; o2 < 10000; o2 += 100 {
5859
if o2 >= o1 {
59-
e, ok := idx.LookupHash(s.toHash(o2))
60+
e, ok := idx.LookupHash(toHash(o2))
6061
c.Assert(ok, Equals, false)
6162
c.Assert(e, IsNil)
6263
} else {
63-
e, ok := idx.LookupHash(s.toHash(o2))
64+
e, ok := idx.LookupHash(toHash(o2))
6465
c.Assert(ok, Equals, true)
6566
c.Assert(e, NotNil)
66-
c.Assert(e.Hash, Equals, s.toHash(o2))
67+
c.Assert(e.Hash, Equals, toHash(o2))
6768
c.Assert(e.Offset, Equals, uint64(o2))
6869
}
6970
}
7071

71-
h1 := s.toHash(o1)
72+
h1 := toHash(o1)
7273
idx.Add(h1, uint64(o1), 0)
7374

7475
for o2 := 0; o2 < 10000; o2 += 100 {
7576
if o2 > o1 {
76-
e, ok := idx.LookupHash(s.toHash(o2))
77+
e, ok := idx.LookupHash(toHash(o2))
7778
c.Assert(ok, Equals, false)
7879
c.Assert(e, IsNil)
7980
} else {
80-
e, ok := idx.LookupHash(s.toHash(o2))
81+
e, ok := idx.LookupHash(toHash(o2))
8182
c.Assert(ok, Equals, true)
8283
c.Assert(e, NotNil)
83-
c.Assert(e.Hash, Equals, s.toHash(o2))
84+
c.Assert(e.Hash, Equals, toHash(o2))
8485
c.Assert(e.Offset, Equals, uint64(o2))
8586
}
8687
}
@@ -92,7 +93,7 @@ func (s *IndexSuite) TestSize(c *C) {
9293

9394
for o1 := 0; o1 < 1000; o1++ {
9495
c.Assert(idx.Size(), Equals, o1)
95-
h1 := s.toHash(o1)
96+
h1 := toHash(o1)
9697
idx.Add(h1, uint64(o1), 0)
9798
}
9899
}
@@ -107,16 +108,26 @@ func (s *IndexSuite) TestIdxFileEmpty(c *C) {
107108
func (s *IndexSuite) TestIdxFile(c *C) {
108109
idx := NewIndex(0)
109110
for o1 := 0; o1 < 1000; o1++ {
110-
h1 := s.toHash(o1)
111+
h1 := toHash(o1)
111112
idx.Add(h1, uint64(o1), 0)
112113
}
113114

114115
idx2 := NewIndexFromIdxFile(idx.ToIdxFile())
115116
c.Assert(idx, DeepEquals, idx2)
116117
}
117118

118-
func (s *IndexSuite) toHash(i int) plumbing.Hash {
119+
func toHash(i int) plumbing.Hash {
119120
is := strconv.Itoa(i)
120121
padding := strings.Repeat("a", 40-len(is))
121122
return plumbing.NewHash(padding + is)
122123
}
124+
125+
func BenchmarkIndexConstruction(b *testing.B) {
126+
b.ReportAllocs()
127+
128+
idx := NewIndex(0)
129+
for o := 0; o < 1e6*b.N; o += 100 {
130+
h1 := toHash(o)
131+
idx.Add(h1, uint64(o), 0)
132+
}
133+
}

0 commit comments

Comments
 (0)