Skip to content
This repository was archived by the owner on Apr 12, 2019. It is now read-only.

Revert to old implementation of GetCommitsInfo #73

Merged
merged 1 commit into from
Jun 28, 2017
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
225 changes: 90 additions & 135 deletions tree_entry.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ package git
import (
"fmt"
"path"
"path/filepath"
"runtime"
"sort"
"strconv"
"strings"
Expand Down Expand Up @@ -145,159 +147,112 @@ func (tes Entries) Sort() {
sort.Sort(tes)
}

// getCommitInfoState transient state for getting commit info for entries
type getCommitInfoState struct {
entries map[string]*TreeEntry // map from filepath to entry
commits map[string]*Commit // map from filepath to commit
lastCommitHash string
lastCommit *Commit
treePath string
headCommit *Commit
nextSearchSize int // next number of commits to search for
type commitInfo struct {
entryName string
infos []interface{}
err error
}

func initGetCommitInfoState(entries Entries, headCommit *Commit, treePath string) *getCommitInfoState {
entriesByPath := make(map[string]*TreeEntry, len(entries))
for _, entry := range entries {
entriesByPath[path.Join(treePath, entry.Name())] = entry
}
if treePath = path.Clean(treePath); treePath == "." {
treePath = ""
}
return &getCommitInfoState{
entries: entriesByPath,
commits: make(map[string]*Commit, len(entriesByPath)),
treePath: treePath,
headCommit: headCommit,
nextSearchSize: 16,
}
}

// GetCommitsInfo gets information of all commits that are corresponding to these entries
// GetCommitsInfo takes advantages of concurrency to speed up getting information
// of all commits that are corresponding to these entries. This method will automatically
// choose the right number of goroutine (concurrency) to use related of the host CPU.
func (tes Entries) GetCommitsInfo(commit *Commit, treePath string) ([][]interface{}, error) {
state := initGetCommitInfoState(tes, commit, treePath)
if err := getCommitsInfo(state); err != nil {
return nil, err
}

commitsInfo := make([][]interface{}, len(tes))
for i, entry := range tes {
commit = state.commits[path.Join(treePath, entry.Name())]
switch entry.Type {
case ObjectCommit:
subModuleURL := ""
if subModule, err := state.headCommit.GetSubModule(entry.Name()); err != nil {
return nil, err
} else if subModule != nil {
subModuleURL = subModule.URL
}
subModuleFile := NewSubModuleFile(commit, subModuleURL, entry.ID.String())
commitsInfo[i] = []interface{}{entry, subModuleFile}
default:
commitsInfo[i] = []interface{}{entry, commit}
}
}
return commitsInfo, nil
}

func (state *getCommitInfoState) nextCommit(hash string) {
state.lastCommitHash = hash
state.lastCommit = nil
return tes.GetCommitsInfoWithCustomConcurrency(commit, treePath, 0)
}

func (state *getCommitInfoState) commit() (*Commit, error) {
var err error
if state.lastCommit == nil {
state.lastCommit, err = state.headCommit.repo.GetCommit(state.lastCommitHash)
// GetCommitsInfoWithCustomConcurrency takes advantages of concurrency to speed up getting information
// of all commits that are corresponding to these entries. If the given maxConcurrency is negative or
// equal to zero: the right number of goroutine (concurrency) to use will be chosen related of the
// host CPU.
func (tes Entries) GetCommitsInfoWithCustomConcurrency(commit *Commit, treePath string, maxConcurrency int) ([][]interface{}, error) {
if len(tes) == 0 {
return nil, nil
}
return state.lastCommit, err
}

func (state *getCommitInfoState) update(entryPath string) error {
var entryNameStartIndex int
if len(state.treePath) > 0 {
entryNameStartIndex = len(state.treePath) + 1
if maxConcurrency <= 0 {
maxConcurrency = runtime.NumCPU()
}

if index := strings.IndexByte(entryPath[entryNameStartIndex:], '/'); index >= 0 {
entryPath = entryPath[:entryNameStartIndex+index]
}

if _, ok := state.entries[entryPath]; !ok {
return nil
} else if _, ok := state.commits[entryPath]; ok {
return nil
}

var err error
state.commits[entryPath], err = state.commit()
return err
}

func getCommitsInfo(state *getCommitInfoState) error {
for len(state.entries) > len(state.commits) {
if err := getNextCommitInfos(state); err != nil {
return err
}
}
return nil
}
// Length of taskChan determines how many goroutines (subprocesses) can run at the same time.
// The length of revChan should be same as taskChan so goroutines whoever finished job can
// exit as early as possible, only store data inside channel.
taskChan := make(chan bool, maxConcurrency)
revChan := make(chan commitInfo, maxConcurrency)
doneChan := make(chan error)

// Receive loop will exit when it collects same number of data pieces as tree entries.
// It notifies doneChan before exits or notify early with possible error.
infoMap := make(map[string][]interface{}, len(tes))
go func() {
i := 0
for info := range revChan {
if info.err != nil {
doneChan <- info.err
return
}

func getNextCommitInfos(state *getCommitInfoState) error {
logOutput, err := logCommand(state.lastCommitHash, state).RunInDir(state.headCommit.repo.Path)
if err != nil {
return err
}
lines := strings.Split(logOutput, "\n")
i := 0
for i < len(lines) {
state.nextCommit(lines[i])
i++
for ; i < len(lines); i++ {
entryPath := lines[i]
if entryPath == "" {
infoMap[info.entryName] = info.infos
i++
if i == len(tes) {
break
}
if entryPath[0] == '"' {
entryPath, err = strconv.Unquote(entryPath)
}
doneChan <- nil
}()

for i := range tes {
// When taskChan is idle (or has empty slots), put operation will not block.
// However when taskChan is full, code will block and wait any running goroutines to finish.
taskChan <- true

if tes[i].Type != ObjectCommit {
go func(i int) {
cinfo := commitInfo{entryName: tes[i].Name()}
c, err := commit.GetCommitByPath(filepath.Join(treePath, tes[i].Name()))
if err != nil {
return fmt.Errorf("Unquote: %v", err)
cinfo.err = fmt.Errorf("GetCommitByPath (%s/%s): %v", treePath, tes[i].Name(), err)
} else {
cinfo.infos = []interface{}{tes[i], c}
}
revChan <- cinfo
<-taskChan // Clear one slot from taskChan to allow new goroutines to start.
}(i)
continue
}

// Handle submodule
go func(i int) {
cinfo := commitInfo{entryName: tes[i].Name()}
sm, err := commit.GetSubModule(path.Join(treePath, tes[i].Name()))
if err != nil && !IsErrNotExist(err) {
cinfo.err = fmt.Errorf("GetSubModule (%s/%s): %v", treePath, tes[i].Name(), err)
revChan <- cinfo
return
}
if err = state.update(entryPath); err != nil {
return err

smURL := ""
if sm != nil {
smURL = sm.URL
}
}
i++ // skip blank line
if len(state.entries) == len(state.commits) {
break
}

c, err := commit.GetCommitByPath(filepath.Join(treePath, tes[i].Name()))
if err != nil {
cinfo.err = fmt.Errorf("GetCommitByPath (%s/%s): %v", treePath, tes[i].Name(), err)
} else {
cinfo.infos = []interface{}{tes[i], NewSubModuleFile(c, smURL, tes[i].ID.String())}
}
revChan <- cinfo
<-taskChan
}(i)
}
return nil
}

func logCommand(exclusiveStartHash string, state *getCommitInfoState) *Command {
var commitHash string
if len(exclusiveStartHash) == 0 {
commitHash = state.headCommit.ID.String()
} else {
commitHash = exclusiveStartHash + "^"
if err := <-doneChan; err != nil {
return nil, err
}
var command *Command
numRemainingEntries := len(state.entries) - len(state.commits)
if numRemainingEntries < 32 {
searchSize := (numRemainingEntries + 1) / 2
command = NewCommand("log", prettyLogFormat, "--name-only",
"-"+strconv.Itoa(searchSize), commitHash, "--")
for entryPath := range state.entries {
if _, ok := state.commits[entryPath]; !ok {
command.AddArguments(entryPath)
}
}
} else {
command = NewCommand("log", prettyLogFormat, "--name-only",
"-"+strconv.Itoa(state.nextSearchSize), commitHash, "--", state.treePath)

commitsInfo := make([][]interface{}, len(tes))
for i := 0; i < len(tes); i++ {
commitsInfo[i] = infoMap[tes[i].Name()]
}
state.nextSearchSize += state.nextSearchSize
return command
return commitsInfo, nil
}