Skip to content

Commit f75a9b2

Browse files
authored
Speed up enry.IsVendor (#15213) (#15245)
Backport #15213 `enry.IsVendor` is kinda slow as it simply iterates across all regexps. This PR ajdusts the regexps to combine them to make this process a little quicker. Related #15143 Signed-off-by: Andrew Thornton <[email protected]>
1 parent 2705696 commit f75a9b2

File tree

6 files changed

+116
-4
lines changed

6 files changed

+116
-4
lines changed

modules/analyze/vendor.go

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
// Copyright 2021 The Gitea Authors. All rights reserved.
2+
// Use of this source code is governed by a MIT-style
3+
// license that can be found in the LICENSE file.
4+
5+
package analyze
6+
7+
import (
8+
"regexp"
9+
"sort"
10+
"strings"
11+
12+
"github.com/go-enry/go-enry/v2/data"
13+
)
14+
15+
var isVendorRegExp *regexp.Regexp
16+
17+
func init() {
18+
matchers := data.VendorMatchers
19+
20+
caretStrings := make([]string, 0, 10)
21+
caretShareStrings := make([]string, 0, 10)
22+
23+
matcherStrings := make([]string, 0, len(matchers))
24+
for _, matcher := range matchers {
25+
str := matcher.String()
26+
if str[0] == '^' {
27+
caretStrings = append(caretStrings, str[1:])
28+
} else if str[0:5] == "(^|/)" {
29+
caretShareStrings = append(caretShareStrings, str[5:])
30+
} else {
31+
matcherStrings = append(matcherStrings, str)
32+
}
33+
}
34+
35+
sort.Strings(caretShareStrings)
36+
sort.Strings(caretStrings)
37+
sort.Strings(matcherStrings)
38+
39+
sb := &strings.Builder{}
40+
sb.WriteString("(?:^(?:")
41+
sb.WriteString(caretStrings[0])
42+
for _, matcher := range caretStrings[1:] {
43+
sb.WriteString(")|(?:")
44+
sb.WriteString(matcher)
45+
}
46+
sb.WriteString("))")
47+
sb.WriteString("|")
48+
sb.WriteString("(?:(?:^|/)(?:")
49+
sb.WriteString(caretShareStrings[0])
50+
for _, matcher := range caretShareStrings[1:] {
51+
sb.WriteString(")|(?:")
52+
sb.WriteString(matcher)
53+
}
54+
sb.WriteString("))")
55+
sb.WriteString("|")
56+
sb.WriteString("(?:")
57+
sb.WriteString(matcherStrings[0])
58+
for _, matcher := range matcherStrings[1:] {
59+
sb.WriteString(")|(?:")
60+
sb.WriteString(matcher)
61+
}
62+
sb.WriteString(")")
63+
combined := sb.String()
64+
isVendorRegExp = regexp.MustCompile(combined)
65+
}
66+
67+
// IsVendor returns whether or not path is a vendor path.
68+
func IsVendor(path string) bool {
69+
return isVendorRegExp.MatchString(path)
70+
}

modules/analyze/vendor_test.go

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
// Copyright 2021 The Gitea Authors. All rights reserved.
2+
// Use of this source code is governed by a MIT-style
3+
// license that can be found in the LICENSE file.
4+
5+
package analyze
6+
7+
import "testing"
8+
9+
func TestIsVendor(t *testing.T) {
10+
tests := []struct {
11+
path string
12+
want bool
13+
}{
14+
{"cache/", true},
15+
{"random/cache/", true},
16+
{"cache", false},
17+
{"dependencies/", true},
18+
{"Dependencies/", true},
19+
{"dependency/", false},
20+
{"dist/", true},
21+
{"dist", false},
22+
{"random/dist/", true},
23+
{"random/dist", false},
24+
{"deps/", true},
25+
{"configure", true},
26+
{"a/configure", true},
27+
{"config.guess", true},
28+
{"config.guess/", false},
29+
{".vscode/", true},
30+
{"doc/_build/", true},
31+
{"a/docs/_build/", true},
32+
{"a/dasdocs/_build-vsdoc.js", true},
33+
{"a/dasdocs/_build-vsdoc.j", false},
34+
}
35+
for _, tt := range tests {
36+
t.Run(tt.path, func(t *testing.T) {
37+
if got := IsVendor(tt.path); got != tt.want {
38+
t.Errorf("IsVendor() = %v, want %v", got, tt.want)
39+
}
40+
})
41+
}
42+
}

modules/git/repo_language_stats_gogit.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
4343

4444
sizes := make(map[string]int64)
4545
err = tree.Files().ForEach(func(f *object.File) error {
46-
if f.Size == 0 || enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) ||
46+
if f.Size == 0 || analyze.IsVendor(f.Name) || enry.IsDotFile(f.Name) ||
4747
enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) {
4848
return nil
4949
}

modules/git/repo_language_stats_nogogit.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
6767
for _, f := range entries {
6868
contentBuf.Reset()
6969
content = contentBuf.Bytes()
70-
if f.Size() == 0 || enry.IsVendor(f.Name()) || enry.IsDotFile(f.Name()) ||
70+
if f.Size() == 0 || analyze.IsVendor(f.Name()) || enry.IsDotFile(f.Name()) ||
7171
enry.IsDocumentation(f.Name()) || enry.IsConfiguration(f.Name()) {
7272
continue
7373
}

modules/indexer/code/bleve.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ func NewBleveIndexer(indexDir string) (*BleveIndexer, bool, error) {
178178

179179
func (b *BleveIndexer) addUpdate(batchWriter *io.PipeWriter, batchReader *bufio.Reader, commitSha string, update fileUpdate, repo *models.Repository, batch rupture.FlushingBatch) error {
180180
// Ignore vendored files in code search
181-
if setting.Indexer.ExcludeVendored && enry.IsVendor(update.Filename) {
181+
if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) {
182182
return nil
183183
}
184184

modules/indexer/code/elastic_search.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ func (b *ElasticSearchIndexer) init() (bool, error) {
177177

178178
func (b *ElasticSearchIndexer) addUpdate(batchWriter *io.PipeWriter, batchReader *bufio.Reader, sha string, update fileUpdate, repo *models.Repository) ([]elastic.BulkableRequest, error) {
179179
// Ignore vendored files in code search
180-
if setting.Indexer.ExcludeVendored && enry.IsVendor(update.Filename) {
180+
if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) {
181181
return nil, nil
182182
}
183183

0 commit comments

Comments
 (0)