Skip to content

Commit a89592d

Browse files
ethantkoeniglafriks
authored andcommitted
Reduce repo indexer disk usage (#3452)
1 parent 283e87d commit a89592d

File tree

14 files changed

+704
-97
lines changed

14 files changed

+704
-97
lines changed

models/issue_indexer.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ func populateIssueIndexer() error {
5353
return err
5454
}
5555
for _, issue := range issues {
56-
if err := batch.Add(issue.update()); err != nil {
56+
if err := issue.update().AddToFlushingBatch(batch); err != nil {
5757
return err
5858
}
5959
}
@@ -78,7 +78,7 @@ func processIssueIndexerUpdateQueue() {
7878
issue, err := GetIssueByID(issueID)
7979
if err != nil {
8080
log.Error(4, "GetIssueByID: %v", err)
81-
} else if err = batch.Add(issue.update()); err != nil {
81+
} else if err = issue.update().AddToFlushingBatch(batch); err != nil {
8282
log.Error(4, "IssueIndexer: %v", err)
8383
}
8484
}

models/repo_indexer.go

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ import (
1414
"code.gitea.io/gitea/modules/indexer"
1515
"code.gitea.io/gitea/modules/log"
1616
"code.gitea.io/gitea/modules/setting"
17+
18+
"github.com/ethantkoenig/rupture"
1719
)
1820

1921
// RepoIndexerStatus status of a repo's entry in the repo indexer
@@ -187,7 +189,7 @@ func getRepoChanges(repo *Repository, revision string) (*repoChanges, error) {
187189
return nonGenesisChanges(repo, revision)
188190
}
189191

190-
func addUpdate(update fileUpdate, repo *Repository, batch *indexer.Batch) error {
192+
func addUpdate(update fileUpdate, repo *Repository, batch rupture.FlushingBatch) error {
191193
stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha).
192194
RunInDir(repo.RepoPath())
193195
if err != nil {
@@ -206,24 +208,26 @@ func addUpdate(update fileUpdate, repo *Repository, batch *indexer.Batch) error
206208
} else if !base.IsTextFile(fileContents) {
207209
return nil
208210
}
209-
return batch.Add(indexer.RepoIndexerUpdate{
211+
indexerUpdate := indexer.RepoIndexerUpdate{
210212
Filepath: update.Filename,
211213
Op: indexer.RepoIndexerOpUpdate,
212214
Data: &indexer.RepoIndexerData{
213215
RepoID: repo.ID,
214216
Content: string(fileContents),
215217
},
216-
})
218+
}
219+
return indexerUpdate.AddToFlushingBatch(batch)
217220
}
218221

219-
func addDelete(filename string, repo *Repository, batch *indexer.Batch) error {
220-
return batch.Add(indexer.RepoIndexerUpdate{
222+
func addDelete(filename string, repo *Repository, batch rupture.FlushingBatch) error {
223+
indexerUpdate := indexer.RepoIndexerUpdate{
221224
Filepath: filename,
222225
Op: indexer.RepoIndexerOpDelete,
223226
Data: &indexer.RepoIndexerData{
224227
RepoID: repo.ID,
225228
},
226-
})
229+
}
230+
return indexerUpdate.AddToFlushingBatch(batch)
227231
}
228232

229233
// parseGitLsTreeOutput parses the output of a `git ls-tree -r --full-name` command

modules/indexer/indexer.go

Lines changed: 30 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,17 @@ package indexer
66

77
import (
88
"fmt"
9+
"os"
910
"strconv"
1011

12+
"code.gitea.io/gitea/modules/setting"
13+
1114
"github.com/blevesearch/bleve"
1215
"github.com/blevesearch/bleve/analysis/token/unicodenorm"
16+
"github.com/blevesearch/bleve/index/upsidedown"
1317
"github.com/blevesearch/bleve/mapping"
1418
"github.com/blevesearch/bleve/search/query"
19+
"github.com/ethantkoenig/rupture"
1520
)
1621

1722
// indexerID a bleve-compatible unique identifier for an integer id
@@ -53,40 +58,36 @@ func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
5358
})
5459
}
5560

56-
// Update represents an update to an indexer
57-
type Update interface {
58-
addToBatch(batch *bleve.Batch) error
59-
}
60-
6161
const maxBatchSize = 16
6262

63-
// Batch batch of indexer updates that automatically flushes once it
64-
// reaches a certain size
65-
type Batch struct {
66-
batch *bleve.Batch
67-
index bleve.Index
68-
}
69-
70-
// Add add update to batch, possibly flushing
71-
func (batch *Batch) Add(update Update) error {
72-
if err := update.addToBatch(batch.batch); err != nil {
73-
return err
63+
// openIndexer open the index at the specified path, checking for metadata
64+
// updates and bleve version updates. If index needs to be created (or
65+
// re-created), returns (nil, nil)
66+
func openIndexer(path string, latestVersion int) (bleve.Index, error) {
67+
_, err := os.Stat(setting.Indexer.IssuePath)
68+
if err != nil && os.IsNotExist(err) {
69+
return nil, nil
70+
} else if err != nil {
71+
return nil, err
7472
}
75-
return batch.flushIfFull()
76-
}
7773

78-
func (batch *Batch) flushIfFull() error {
79-
if batch.batch.Size() >= maxBatchSize {
80-
return batch.Flush()
74+
metadata, err := rupture.ReadIndexMetadata(path)
75+
if err != nil {
76+
return nil, err
77+
}
78+
if metadata.Version < latestVersion {
79+
// the indexer is using a previous version, so we should delete it and
80+
// re-populate
81+
return nil, os.RemoveAll(path)
8182
}
82-
return nil
83-
}
8483

85-
// Flush manually flush the batch, regardless of its size
86-
func (batch *Batch) Flush() error {
87-
if err := batch.index.Batch(batch.batch); err != nil {
88-
return err
84+
index, err := bleve.Open(path)
85+
if err != nil && err == upsidedown.IncompatibleVersion {
86+
// the indexer was built with a previous version of bleve, so we should
87+
// delete it and re-populate
88+
return nil, os.RemoveAll(path)
89+
} else if err != nil {
90+
return nil, err
8991
}
90-
batch.batch.Reset()
91-
return nil
92+
return index, nil
9293
}

modules/indexer/issue.go

Lines changed: 32 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,26 @@
55
package indexer
66

77
import (
8-
"os"
9-
108
"code.gitea.io/gitea/modules/log"
119
"code.gitea.io/gitea/modules/setting"
1210

1311
"github.com/blevesearch/bleve"
1412
"github.com/blevesearch/bleve/analysis/analyzer/custom"
1513
"github.com/blevesearch/bleve/analysis/token/lowercase"
1614
"github.com/blevesearch/bleve/analysis/tokenizer/unicode"
17-
"github.com/blevesearch/bleve/index/upsidedown"
15+
"github.com/ethantkoenig/rupture"
1816
)
1917

2018
// issueIndexer (thread-safe) index for searching issues
2119
var issueIndexer bleve.Index
2220

21+
const (
22+
issueIndexerAnalyzer = "issueIndexer"
23+
issueIndexerDocType = "issueIndexerDocType"
24+
25+
issueIndexerLatestVersion = 1
26+
)
27+
2328
// IssueIndexerData data stored in the issue indexer
2429
type IssueIndexerData struct {
2530
RepoID int64
@@ -28,35 +33,33 @@ type IssueIndexerData struct {
2833
Comments []string
2934
}
3035

36+
// Type returns the document type, for bleve's mapping.Classifier interface.
37+
func (i *IssueIndexerData) Type() string {
38+
return issueIndexerDocType
39+
}
40+
3141
// IssueIndexerUpdate an update to the issue indexer
3242
type IssueIndexerUpdate struct {
3343
IssueID int64
3444
Data *IssueIndexerData
3545
}
3646

37-
func (update IssueIndexerUpdate) addToBatch(batch *bleve.Batch) error {
38-
return batch.Index(indexerID(update.IssueID), update.Data)
47+
// AddToFlushingBatch adds the update to the given flushing batch.
48+
func (i IssueIndexerUpdate) AddToFlushingBatch(batch rupture.FlushingBatch) error {
49+
return batch.Index(indexerID(i.IssueID), i.Data)
3950
}
4051

41-
const issueIndexerAnalyzer = "issueIndexer"
42-
4352
// InitIssueIndexer initialize issue indexer
4453
func InitIssueIndexer(populateIndexer func() error) {
45-
_, err := os.Stat(setting.Indexer.IssuePath)
46-
if err != nil && !os.IsNotExist(err) {
54+
var err error
55+
issueIndexer, err = openIndexer(setting.Indexer.IssuePath, issueIndexerLatestVersion)
56+
if err != nil {
4757
log.Fatal(4, "InitIssueIndexer: %v", err)
48-
} else if err == nil {
49-
issueIndexer, err = bleve.Open(setting.Indexer.IssuePath)
50-
if err == nil {
51-
return
52-
} else if err != upsidedown.IncompatibleVersion {
53-
log.Fatal(4, "InitIssueIndexer, open index: %v", err)
54-
}
55-
log.Warn("Incompatible bleve version, deleting and recreating issue indexer")
56-
if err = os.RemoveAll(setting.Indexer.IssuePath); err != nil {
57-
log.Fatal(4, "InitIssueIndexer: remove index, %v", err)
58-
}
5958
}
59+
if issueIndexer != nil {
60+
return
61+
}
62+
6063
if err = createIssueIndexer(); err != nil {
6164
log.Fatal(4, "InitIssuesIndexer: create index, %v", err)
6265
}
@@ -70,9 +73,13 @@ func createIssueIndexer() error {
7073
mapping := bleve.NewIndexMapping()
7174
docMapping := bleve.NewDocumentMapping()
7275

73-
docMapping.AddFieldMappingsAt("RepoID", bleve.NewNumericFieldMapping())
76+
numericFieldMapping := bleve.NewNumericFieldMapping()
77+
numericFieldMapping.IncludeInAll = false
78+
docMapping.AddFieldMappingsAt("RepoID", numericFieldMapping)
7479

7580
textFieldMapping := bleve.NewTextFieldMapping()
81+
textFieldMapping.Store = false
82+
textFieldMapping.IncludeInAll = false
7683
docMapping.AddFieldMappingsAt("Title", textFieldMapping)
7784
docMapping.AddFieldMappingsAt("Content", textFieldMapping)
7885
docMapping.AddFieldMappingsAt("Comments", textFieldMapping)
@@ -89,19 +96,17 @@ func createIssueIndexer() error {
8996
}
9097

9198
mapping.DefaultAnalyzer = issueIndexerAnalyzer
92-
mapping.AddDocumentMapping("issues", docMapping)
99+
mapping.AddDocumentMapping(issueIndexerDocType, docMapping)
100+
mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping())
93101

94102
var err error
95103
issueIndexer, err = bleve.New(setting.Indexer.IssuePath, mapping)
96104
return err
97105
}
98106

99107
// IssueIndexerBatch batch to add updates to
100-
func IssueIndexerBatch() *Batch {
101-
return &Batch{
102-
batch: issueIndexer.NewBatch(),
103-
index: issueIndexer,
104-
}
108+
func IssueIndexerBatch() rupture.FlushingBatch {
109+
return rupture.NewFlushingBatch(issueIndexer, maxBatchSize)
105110
}
106111

107112
// SearchIssuesByKeyword searches for issues by given conditions.

0 commit comments

Comments
 (0)