Skip to content

Commit ae7e6cd

Browse files
authored
Reduce calls to git cat-file -s (#14682)
* Reduce calls to git cat-file -s There are multiple places where there are repeated calls to git cat-file -s due to the blobs not being created with their size. Through judicious use of git ls-tree -l and slight adjustments to the indexer code we can avoid a lot of these calls. * simplify by always expecting the long format * Also always set the sized field and tell the indexer the update is sized
1 parent 7ba1581 commit ae7e6cd

9 files changed

+141
-28
lines changed

modules/git/parse_gogit.go

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,20 +10,21 @@ import (
1010
"bytes"
1111
"fmt"
1212
"strconv"
13+
"strings"
1314

1415
"github.com/go-git/go-git/v5/plumbing/filemode"
1516
"github.com/go-git/go-git/v5/plumbing/object"
1617
)
1718

18-
// ParseTreeEntries parses the output of a `git ls-tree` command.
19+
// ParseTreeEntries parses the output of a `git ls-tree -l` command.
1920
func ParseTreeEntries(data []byte) ([]*TreeEntry, error) {
2021
return parseTreeEntries(data, nil)
2122
}
2223

2324
func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
2425
entries := make([]*TreeEntry, 0, 10)
2526
for pos := 0; pos < len(data); {
26-
// expect line to be of the form "<mode> <type> <sha>\t<filename>"
27+
// expect line to be of the form "<mode> <type> <sha> <space-padded-size>\t<filename>"
2728
entry := new(TreeEntry)
2829
entry.gogitTreeEntry = &object.TreeEntry{}
2930
entry.ptree = ptree
@@ -61,7 +62,16 @@ func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
6162
entry.gogitTreeEntry.Hash = id
6263
pos += 41 // skip over sha and trailing space
6364

64-
end := pos + bytes.IndexByte(data[pos:], '\n')
65+
end := pos + bytes.IndexByte(data[pos:], '\t')
66+
if end < pos {
67+
return nil, fmt.Errorf("Invalid ls-tree -l output: %s", string(data))
68+
}
69+
entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(data[pos:end])), 10, 64)
70+
entry.sized = true
71+
72+
pos = end + 1
73+
74+
end = pos + bytes.IndexByte(data[pos:], '\n')
6575
if end < pos {
6676
return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data))
6777
}

modules/git/parse_gogit_test.go

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ func TestParseTreeEntries(t *testing.T) {
2424
Expected: []*TreeEntry{},
2525
},
2626
{
27-
Input: "100644 blob 61ab7345a1a3bbc590068ccae37b8515cfc5843c\texample/file2.txt\n",
27+
Input: "100644 blob 61ab7345a1a3bbc590068ccae37b8515cfc5843c 1022\texample/file2.txt\n",
2828
Expected: []*TreeEntry{
2929
{
3030
ID: MustIDFromString("61ab7345a1a3bbc590068ccae37b8515cfc5843c"),
@@ -33,12 +33,14 @@ func TestParseTreeEntries(t *testing.T) {
3333
Name: "example/file2.txt",
3434
Mode: filemode.Regular,
3535
},
36+
size: 1022,
37+
sized: true,
3638
},
3739
},
3840
},
3941
{
40-
Input: "120000 blob 61ab7345a1a3bbc590068ccae37b8515cfc5843c\t\"example/\\n.txt\"\n" +
41-
"040000 tree 1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8\texample\n",
42+
Input: "120000 blob 61ab7345a1a3bbc590068ccae37b8515cfc5843c 234131\t\"example/\\n.txt\"\n" +
43+
"040000 tree 1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8 -\texample\n",
4244
Expected: []*TreeEntry{
4345
{
4446
ID: MustIDFromString("61ab7345a1a3bbc590068ccae37b8515cfc5843c"),
@@ -47,9 +49,12 @@ func TestParseTreeEntries(t *testing.T) {
4749
Name: "example/\n.txt",
4850
Mode: filemode.Symlink,
4951
},
52+
size: 234131,
53+
sized: true,
5054
},
5155
{
52-
ID: MustIDFromString("1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8"),
56+
ID: MustIDFromString("1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8"),
57+
sized: true,
5358
gogitTreeEntry: &object.TreeEntry{
5459
Hash: MustIDFromString("1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8"),
5560
Name: "example",

modules/git/parse_nogogit.go

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,18 @@ import (
1010
"bytes"
1111
"fmt"
1212
"strconv"
13+
"strings"
1314
)
1415

15-
// ParseTreeEntries parses the output of a `git ls-tree` command.
16+
// ParseTreeEntries parses the output of a `git ls-tree -l` command.
1617
func ParseTreeEntries(data []byte) ([]*TreeEntry, error) {
1718
return parseTreeEntries(data, nil)
1819
}
1920

2021
func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
2122
entries := make([]*TreeEntry, 0, 10)
2223
for pos := 0; pos < len(data); {
23-
// expect line to be of the form "<mode> <type> <sha>\t<filename>"
24+
// expect line to be of the form "<mode> <type> <sha> <space-padded-size>\t<filename>"
2425
entry := new(TreeEntry)
2526
entry.ptree = ptree
2627
if pos+6 > len(data) {
@@ -56,7 +57,16 @@ func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
5657
entry.ID = id
5758
pos += 41 // skip over sha and trailing space
5859

59-
end := pos + bytes.IndexByte(data[pos:], '\n')
60+
end := pos + bytes.IndexByte(data[pos:], '\t')
61+
if end < pos {
62+
return nil, fmt.Errorf("Invalid ls-tree -l output: %s", string(data))
63+
}
64+
entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(data[pos:end])), 10, 64)
65+
entry.sized = true
66+
67+
pos = end + 1
68+
69+
end = pos + bytes.IndexByte(data[pos:], '\n')
6070
if end < pos {
6171
return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data))
6272
}

modules/git/parse_nogogit_test.go

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
// Copyright 2021 The Gitea Authors. All rights reserved.
2+
// Use of this source code is governed by a MIT-style
3+
// license that can be found in the LICENSE file.
4+
5+
// +build !gogit
6+
7+
package git
8+
9+
import (
10+
"testing"
11+
12+
"github.com/stretchr/testify/assert"
13+
)
14+
15+
func TestParseTreeEntries(t *testing.T) {
16+
17+
testCases := []struct {
18+
Input string
19+
Expected []*TreeEntry
20+
}{
21+
{
22+
Input: `100644 blob ea0d83c9081af9500ac9f804101b3fd0a5c293af 8218 README.md
23+
100644 blob 037f27dc9d353ae4fd50f0474b2194c593914e35 4681 README_ZH.md
24+
100644 blob 9846a94f7e8350a916632929d0fda38c90dd2ca8 429 SECURITY.md
25+
040000 tree 84b90550547016f73c5dd3f50dea662389e67b6d - assets
26+
`,
27+
Expected: []*TreeEntry{
28+
{
29+
ID: MustIDFromString("ea0d83c9081af9500ac9f804101b3fd0a5c293af"),
30+
name: "README.md",
31+
entryMode: EntryModeBlob,
32+
size: 8218,
33+
sized: true,
34+
},
35+
{
36+
ID: MustIDFromString("037f27dc9d353ae4fd50f0474b2194c593914e35"),
37+
name: "README_ZH.md",
38+
entryMode: EntryModeBlob,
39+
size: 4681,
40+
sized: true,
41+
},
42+
{
43+
ID: MustIDFromString("9846a94f7e8350a916632929d0fda38c90dd2ca8"),
44+
name: "SECURITY.md",
45+
entryMode: EntryModeBlob,
46+
size: 429,
47+
sized: true,
48+
},
49+
{
50+
ID: MustIDFromString("84b90550547016f73c5dd3f50dea662389e67b6d"),
51+
name: "assets",
52+
entryMode: EntryModeTree,
53+
sized: true,
54+
},
55+
},
56+
},
57+
}
58+
for _, testCase := range testCases {
59+
entries, err := ParseTreeEntries([]byte(testCase.Input))
60+
assert.NoError(t, err)
61+
assert.EqualValues(t, len(testCase.Expected), len(entries))
62+
for i, entry := range entries {
63+
assert.EqualValues(t, testCase.Expected[i].ID, entry.ID)
64+
assert.EqualValues(t, testCase.Expected[i].name, entry.name)
65+
assert.EqualValues(t, testCase.Expected[i].entryMode, entry.entryMode)
66+
assert.EqualValues(t, testCase.Expected[i].sized, entry.sized)
67+
assert.EqualValues(t, testCase.Expected[i].size, entry.size)
68+
}
69+
}
70+
}

modules/git/tree_entry_nogogit.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,5 +87,7 @@ func (te *TreeEntry) Blob() *Blob {
8787
ID: te.ID,
8888
repoPath: te.ptree.repo.Path,
8989
name: te.Name(),
90+
size: te.size,
91+
gotSize: te.sized,
9092
}
9193
}

modules/git/tree_nogogit.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ func (t *Tree) ListEntries() (Entries, error) {
3232
return t.entries, nil
3333
}
3434

35-
stdout, err := NewCommand("ls-tree", t.ID.String()).RunInDirBytes(t.repo.Path)
35+
stdout, err := NewCommand("ls-tree", "-l", t.ID.String()).RunInDirBytes(t.repo.Path)
3636
if err != nil {
3737
if strings.Contains(err.Error(), "fatal: Not a valid object name") || strings.Contains(err.Error(), "fatal: not a tree object") {
3838
return nil, ErrNotExist{
@@ -55,7 +55,7 @@ func (t *Tree) ListEntriesRecursive() (Entries, error) {
5555
if t.entriesRecursiveParsed {
5656
return t.entriesRecursive, nil
5757
}
58-
stdout, err := NewCommand("ls-tree", "-t", "-r", t.ID.String()).RunInDirBytes(t.repo.Path)
58+
stdout, err := NewCommand("ls-tree", "-t", "-l", "-r", t.ID.String()).RunInDirBytes(t.repo.Path)
5959
if err != nil {
6060
return nil, err
6161
}

modules/indexer/code/bleve.go

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -179,14 +179,20 @@ func (b *BleveIndexer) addUpdate(commitSha string, update fileUpdate, repo *mode
179179
return nil
180180
}
181181

182-
stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha).
183-
RunInDir(repo.RepoPath())
184-
if err != nil {
185-
return err
182+
size := update.Size
183+
184+
if !update.Sized {
185+
stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha).
186+
RunInDir(repo.RepoPath())
187+
if err != nil {
188+
return err
189+
}
190+
if size, err = strconv.ParseInt(strings.TrimSpace(stdout), 10, 64); err != nil {
191+
return fmt.Errorf("Misformatted git cat-file output: %v", err)
192+
}
186193
}
187-
if size, err := strconv.Atoi(strings.TrimSpace(stdout)); err != nil {
188-
return fmt.Errorf("Misformatted git cat-file output: %v", err)
189-
} else if int64(size) > setting.Indexer.MaxIndexerFileSize {
194+
195+
if size > setting.Indexer.MaxIndexerFileSize {
190196
return b.addDelete(update.Filename, repo, batch)
191197
}
192198

modules/indexer/code/elastic_search.go

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -178,14 +178,20 @@ func (b *ElasticSearchIndexer) addUpdate(sha string, update fileUpdate, repo *mo
178178
return nil, nil
179179
}
180180

181-
stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha).
182-
RunInDir(repo.RepoPath())
183-
if err != nil {
184-
return nil, err
181+
size := update.Size
182+
183+
if !update.Sized {
184+
stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha).
185+
RunInDir(repo.RepoPath())
186+
if err != nil {
187+
return nil, err
188+
}
189+
if size, err = strconv.ParseInt(strings.TrimSpace(stdout), 10, 64); err != nil {
190+
return nil, fmt.Errorf("Misformatted git cat-file output: %v", err)
191+
}
185192
}
186-
if size, err := strconv.Atoi(strings.TrimSpace(stdout)); err != nil {
187-
return nil, fmt.Errorf("Misformatted git cat-file output: %v", err)
188-
} else if int64(size) > setting.Indexer.MaxIndexerFileSize {
193+
194+
if size > setting.Indexer.MaxIndexerFileSize {
189195
return []elastic.BulkableRequest{b.addDelete(update.Filename, repo)}, nil
190196
}
191197

modules/indexer/code/git.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ import (
1717
type fileUpdate struct {
1818
Filename string
1919
BlobSha string
20+
Size int64
21+
Sized bool
2022
}
2123

2224
// repoChanges changes (file additions/updates/removals) to a repo
@@ -77,6 +79,8 @@ func parseGitLsTreeOutput(stdout []byte) ([]fileUpdate, error) {
7779
updates[idxCount] = fileUpdate{
7880
Filename: entry.Name(),
7981
BlobSha: entry.ID.String(),
82+
Size: entry.Size(),
83+
Sized: true,
8084
}
8185
idxCount++
8286
}
@@ -87,7 +91,7 @@ func parseGitLsTreeOutput(stdout []byte) ([]fileUpdate, error) {
8791
// genesisChanges get changes to add repo to the indexer for the first time
8892
func genesisChanges(repo *models.Repository, revision string) (*repoChanges, error) {
8993
var changes repoChanges
90-
stdout, err := git.NewCommand("ls-tree", "--full-tree", "-r", revision).
94+
stdout, err := git.NewCommand("ls-tree", "--full-tree", "-l", "-r", revision).
9195
RunInDirBytes(repo.RepoPath())
9296
if err != nil {
9397
return nil, err
@@ -162,7 +166,7 @@ func nonGenesisChanges(repo *models.Repository, revision string) (*repoChanges,
162166
}
163167
}
164168

165-
cmd := git.NewCommand("ls-tree", "--full-tree", revision, "--")
169+
cmd := git.NewCommand("ls-tree", "--full-tree", "-l", revision, "--")
166170
cmd.AddArguments(updatedFilenames...)
167171
lsTreeStdout, err := cmd.RunInDirBytes(repo.RepoPath())
168172
if err != nil {

0 commit comments

Comments
 (0)