Skip to content
This repository was archived by the owner on Apr 12, 2019. It is now read-only.

Commit ab9e103

Browse files
committed
Faster implementation of GetCommitsInfo
1 parent f0a094c commit ab9e103

File tree

4 files changed

+198
-95
lines changed

4 files changed

+198
-95
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,5 @@ _testmain.go
2424
*.prof
2525

2626
coverage.out
27+
28+
benchmark_repos/

Makefile

+6-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
IMPORT := code.gitea.io/git
22

3-
PACKAGES ?= $(shell go list ./... | grep -v /vendor/)
3+
PACKAGES ?= $(shell go list ./... | grep -v /vendor/ | grep -v /benchmark_repos/)
44
GENERATE ?= code.gitea.io/git
55

66
.PHONY: all
@@ -18,7 +18,7 @@ generate:
1818

1919
.PHONY: fmt
2020
fmt:
21-
find . -name "*.go" -type f -not -path "./vendor/*" | xargs gofmt -s -w
21+
find . -name "*.go" -type f -not -path "./vendor/*" -not -path "./benchmark_repos/*" | xargs gofmt -s -w
2222

2323
.PHONY: vet
2424
vet:
@@ -35,6 +35,10 @@ lint:
3535
test:
3636
for PKG in $(PACKAGES); do go test -cover -coverprofile $$GOPATH/src/$$PKG/coverage.out $$PKG || exit 1; done;
3737

38+
.PHONY: bench
39+
bench:
40+
go test -run=XXXXXX -benchtime=10s -bench=. || exit 1
41+
3842
.PHONY: build
3943
build:
4044
go build .

tree_entry.go

+128-93
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,7 @@
55
package git
66

77
import (
8-
"fmt"
9-
"path"
108
"path/filepath"
11-
"runtime"
129
"sort"
1310
"strconv"
1411
"strings"
@@ -147,112 +144,150 @@ func (tes Entries) Sort() {
147144
sort.Sort(tes)
148145
}
149146

150-
type commitInfo struct {
151-
entryName string
152-
infos []interface{}
153-
err error
147+
// getCommitInfoState transient state for getting commit info for entries
148+
type getCommitInfoState struct {
149+
entries map[string]*TreeEntry // map from filepath to entry
150+
commits map[string]*Commit // map from entry name to commit
151+
lastCommitHash string
152+
lastCommit *Commit
153+
treePath string
154+
headCommit *Commit
155+
nextSearchSize int // next number of commits to search for
154156
}
155157

156-
// GetCommitsInfo takes advantages of concurrency to speed up getting information
157-
// of all commits that are corresponding to these entries. This method will automatically
158-
// choose the right number of goroutine (concurrency) to use related of the host CPU.
158+
func initGetCommitInfoState(entries Entries, headCommit *Commit, treePath string) *getCommitInfoState {
159+
entriesByPath := make(map[string]*TreeEntry, len(entries))
160+
for _, entry := range entries {
161+
entriesByPath[filepath.Join(treePath, entry.Name())] = entry
162+
}
163+
return &getCommitInfoState{
164+
entries: entriesByPath,
165+
commits: make(map[string]*Commit, len(entriesByPath)),
166+
treePath: treePath,
167+
headCommit: headCommit,
168+
nextSearchSize: 16,
169+
}
170+
}
171+
172+
// GetCommitsInfo gets information of all commits that are corresponding to these entries
159173
func (tes Entries) GetCommitsInfo(commit *Commit, treePath string) ([][]interface{}, error) {
160-
return tes.GetCommitsInfoWithCustomConcurrency(commit, treePath, 0)
174+
state := initGetCommitInfoState(tes, commit, treePath)
175+
if err := getCommitsInfo(state); err != nil {
176+
return nil, err
177+
}
178+
179+
commitsInfo := make([][]interface{}, len(tes))
180+
for i, entry := range tes {
181+
commit = state.commits[filepath.Join(treePath, entry.Name())]
182+
switch entry.Type {
183+
case ObjectCommit:
184+
subModuleURL := ""
185+
if subModule, err := state.headCommit.GetSubModule(entry.Name()); err != nil {
186+
return nil, err
187+
} else if subModule != nil {
188+
subModuleURL = subModule.URL
189+
}
190+
subModuleFile := NewSubModuleFile(commit, subModuleURL, entry.ID.String())
191+
commitsInfo[i] = []interface{}{entry, subModuleFile}
192+
default:
193+
commitsInfo[i] = []interface{}{entry, commit}
194+
}
195+
}
196+
return commitsInfo, nil
161197
}
162198

163-
// GetCommitsInfoWithCustomConcurrency takes advantages of concurrency to speed up getting information
164-
// of all commits that are corresponding to these entries. If the given maxConcurrency is negative or
165-
// equal to zero: the right number of goroutine (concurrency) to use will be chosen related of the
166-
// host CPU.
167-
func (tes Entries) GetCommitsInfoWithCustomConcurrency(commit *Commit, treePath string, maxConcurrency int) ([][]interface{}, error) {
168-
if len(tes) == 0 {
169-
return nil, nil
199+
func (state *getCommitInfoState) nextCommit(hash string) {
200+
state.lastCommitHash = hash
201+
state.lastCommit = nil
202+
}
203+
204+
func (state *getCommitInfoState) commit() (*Commit, error) {
205+
var err error = nil
206+
if state.lastCommit == nil {
207+
state.lastCommit, err = state.headCommit.repo.GetCommit(state.lastCommitHash)
170208
}
209+
return state.lastCommit, err
210+
}
171211

172-
if maxConcurrency <= 0 {
173-
maxConcurrency = runtime.NumCPU()
212+
func (state *getCommitInfoState) update(path string) error {
213+
relPath, err := filepath.Rel(state.treePath, path)
214+
if err != nil {
215+
return nil
216+
}
217+
var entryPath string
218+
if index := strings.IndexRune(relPath, '/'); index >= 0 {
219+
entryPath = filepath.Join(state.treePath, relPath[:index])
220+
} else {
221+
entryPath = path
174222
}
223+
if _, ok := state.entries[entryPath]; !ok {
224+
return nil
225+
} else if _, ok := state.commits[entryPath]; ok {
226+
return nil
227+
} else if commit, err := state.commit(); err != nil {
228+
return err
229+
} else {
230+
state.commits[entryPath] = commit
231+
}
232+
return nil
233+
}
175234

176-
// Length of taskChan determines how many goroutines (subprocesses) can run at the same time.
177-
// The length of revChan should be same as taskChan so goroutines whoever finished job can
178-
// exit as early as possible, only store data inside channel.
179-
taskChan := make(chan bool, maxConcurrency)
180-
revChan := make(chan commitInfo, maxConcurrency)
181-
doneChan := make(chan error)
182-
183-
// Receive loop will exit when it collects same number of data pieces as tree entries.
184-
// It notifies doneChan before exits or notify early with possible error.
185-
infoMap := make(map[string][]interface{}, len(tes))
186-
go func() {
187-
i := 0
188-
for info := range revChan {
189-
if info.err != nil {
190-
doneChan <- info.err
191-
return
192-
}
235+
func getCommitsInfo(state *getCommitInfoState) error {
236+
for len(state.entries) > len(state.commits) {
237+
if err := getNextCommitInfos(state); err != nil {
238+
return err
239+
}
240+
}
241+
return nil
242+
}
193243

194-
infoMap[info.entryName] = info.infos
195-
i++
196-
if i == len(tes) {
244+
func getNextCommitInfos(state *getCommitInfoState) error {
245+
logOutput, err := logCommand(state.lastCommitHash, state).RunInDir(state.headCommit.repo.Path)
246+
if err != nil {
247+
return err
248+
}
249+
lines := strings.Split(logOutput, "\n")
250+
i := 0
251+
for i < len(lines) {
252+
state.nextCommit(lines[i])
253+
i++
254+
for ; i < len(lines); i++ {
255+
path := lines[i]
256+
if path == "" {
197257
break
198258
}
259+
state.update(path)
199260
}
200-
doneChan <- nil
201-
}()
202-
203-
for i := range tes {
204-
// When taskChan is idle (or has empty slots), put operation will not block.
205-
// However when taskChan is full, code will block and wait any running goroutines to finish.
206-
taskChan <- true
207-
208-
if tes[i].Type != ObjectCommit {
209-
go func(i int) {
210-
cinfo := commitInfo{entryName: tes[i].Name()}
211-
c, err := commit.GetCommitByPath(filepath.Join(treePath, tes[i].Name()))
212-
if err != nil {
213-
cinfo.err = fmt.Errorf("GetCommitByPath (%s/%s): %v", treePath, tes[i].Name(), err)
214-
} else {
215-
cinfo.infos = []interface{}{tes[i], c}
216-
}
217-
revChan <- cinfo
218-
<-taskChan // Clear one slot from taskChan to allow new goroutines to start.
219-
}(i)
220-
continue
261+
i++ // skip blank line
262+
if len(state.entries) == len(state.commits) {
263+
break
221264
}
222-
223-
// Handle submodule
224-
go func(i int) {
225-
cinfo := commitInfo{entryName: tes[i].Name()}
226-
sm, err := commit.GetSubModule(path.Join(treePath, tes[i].Name()))
227-
if err != nil && !IsErrNotExist(err) {
228-
cinfo.err = fmt.Errorf("GetSubModule (%s/%s): %v", treePath, tes[i].Name(), err)
229-
revChan <- cinfo
230-
return
231-
}
232-
233-
smURL := ""
234-
if sm != nil {
235-
smURL = sm.URL
236-
}
237-
238-
c, err := commit.GetCommitByPath(filepath.Join(treePath, tes[i].Name()))
239-
if err != nil {
240-
cinfo.err = fmt.Errorf("GetCommitByPath (%s/%s): %v", treePath, tes[i].Name(), err)
241-
} else {
242-
cinfo.infos = []interface{}{tes[i], NewSubModuleFile(c, smURL, tes[i].ID.String())}
243-
}
244-
revChan <- cinfo
245-
<-taskChan
246-
}(i)
247265
}
266+
return nil
267+
}
248268

249-
if err := <-doneChan; err != nil {
250-
return nil, err
269+
func logCommand(exclusiveStartHash string, state *getCommitInfoState) *Command {
270+
var commitHash string
271+
if len(exclusiveStartHash) == 0 {
272+
commitHash = "HEAD"
273+
} else {
274+
commitHash = exclusiveStartHash + "^"
251275
}
252-
253-
commitsInfo := make([][]interface{}, len(tes))
254-
for i := 0; i < len(tes); i++ {
255-
commitsInfo[i] = infoMap[tes[i].Name()]
276+
var command *Command
277+
numRemainingEntries := len(state.entries) - len(state.commits)
278+
if numRemainingEntries < 32 {
279+
searchSize := (numRemainingEntries + 1) / 2
280+
command = NewCommand("log", prettyLogFormat, "--name-only",
281+
"-"+strconv.Itoa(searchSize), commitHash, "--")
282+
for path, entry := range state.entries {
283+
if _, ok := state.commits[entry.Name()]; !ok {
284+
command.AddArguments(path)
285+
}
286+
}
287+
} else {
288+
command = NewCommand("log", prettyLogFormat, "--name-only",
289+
"-"+strconv.Itoa(state.nextSearchSize), commitHash, "--", state.treePath)
256290
}
257-
return commitsInfo, nil
291+
state.nextSearchSize += state.nextSearchSize
292+
return command
258293
}

tree_entry_test.go

+62
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
// Copyright 2017 The Gitea Authors. All rights reserved.
2+
// Use of this source code is governed by a MIT-style
3+
// license that can be found in the LICENSE file.
4+
5+
package git
6+
7+
import (
8+
"os"
9+
"path/filepath"
10+
"testing"
11+
"time"
12+
)
13+
14+
const benchmarkReposDir = "benchmark_repos/"
15+
16+
func setupGitRepo(url string, name string) (string, error) {
17+
repoDir := filepath.Join(benchmarkReposDir, name)
18+
if _, err := os.Stat(repoDir); err == nil {
19+
return repoDir, nil
20+
}
21+
return repoDir, Clone(url, repoDir, CloneRepoOptions{
22+
Mirror: false,
23+
Bare: false,
24+
Quiet: true,
25+
Timeout: 5 * time.Minute,
26+
})
27+
}
28+
29+
func BenchmarkEntries_GetCommitsInfo(b *testing.B) {
30+
benchmarks := []struct {
31+
url string
32+
name string
33+
}{
34+
{url: "https://github.com/go-gitea/gitea.git", name: "gitea"},
35+
{url: "https://github.com/ethantkoenig/manyfiles.git", name: "manyfiles"},
36+
{url: "https://github.com/moby/moby.git", name: "moby"},
37+
{url: "https://github.com/golang/go.git", name: "go"},
38+
{url: "https://github.com/torvalds/linux.git", name: "linux"},
39+
}
40+
for _, benchmark := range benchmarks {
41+
var commit *Commit
42+
var entries Entries
43+
if repoPath, err := setupGitRepo(benchmark.url, benchmark.name); err != nil {
44+
panic(err)
45+
} else if repo, err := OpenRepository(repoPath); err != nil {
46+
panic(err)
47+
} else if commit, err = repo.GetBranchCommit("master"); err != nil {
48+
panic(err)
49+
} else if entries, err = commit.Tree.ListEntries(); err != nil {
50+
panic(err)
51+
}
52+
entries.Sort()
53+
b.Run(benchmark.name, func(b *testing.B) {
54+
for i := 0; i < b.N; i++ {
55+
_, err := entries.GetCommitsInfo(commit, "")
56+
if err != nil {
57+
panic(err)
58+
}
59+
}
60+
})
61+
}
62+
}

0 commit comments

Comments
 (0)