Skip to content

Commit 1e76a82

Browse files
wolfogrelunny
andauthored
Refactor and enhance issue indexer to support both searching, filtering and paging (#26012)
Fix #24662. Replace #24822 and #25708 (although it has been merged) ## Background In the past, Gitea supported issue searching with a keyword and conditions in a less efficient way. It worked by searching for issues with the keyword and obtaining limited IDs (as it is heavy to get all) on the indexer (bleve/elasticsearch/meilisearch), and then querying with conditions on the database to find a subset of the found IDs. This is why the results could be incomplete. To solve this issue, we need to store all fields that could be used as conditions in the indexer and support both keyword and additional conditions when searching with the indexer. ## Major changes - Redefine `IndexerData` to include all fields that could be used as filter conditions. - Refactor `Search(ctx context.Context, kw string, repoIDs []int64, limit, start int, state string)` to `Search(ctx context.Context, options *SearchOptions)`, so it supports more conditions now. - Change the data type stored in `issueIndexerQueue`. Use `IndexerMetadata` instead of `IndexerData` in case the data has been updated while it is in the queue. This also reduces the storage size of the queue. - Enhance searching with Bleve/Elasticsearch/Meilisearch, make them fully support `SearchOptions`. Also, update the data versions. - Keep most logic of database indexer, but remove `issues.SearchIssueIDsByKeyword` in `models` to avoid confusion where is the entry point to search issues. - Start a Meilisearch instance to test it in unit tests. - Add unit tests with almost full coverage to test Bleve/Elasticsearch/Meilisearch indexer. --------- Co-authored-by: Lunny Xiao <[email protected]>
1 parent aba9096 commit 1e76a82

File tree

37 files changed

+2955
-851
lines changed

37 files changed

+2955
-851
lines changed

.github/workflows/pull-db-tests.yml

+7-1
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,12 @@ jobs:
9898
discovery.type: single-node
9999
ports:
100100
- "9200:9200"
101+
meilisearch:
102+
image: getmeili/meilisearch:v1.2.0
103+
env:
104+
MEILI_ENV: development # disable auth
105+
ports:
106+
- "7700:7700"
101107
smtpimap:
102108
image: tabascoterrier/docker-imap-devel:latest
103109
ports:
@@ -128,7 +134,7 @@ jobs:
128134
go-version: ">=1.20"
129135
check-latest: true
130136
- name: Add hosts to /etc/hosts
131-
run: '[ -e "/.dockerenv" ] || [ -e "/run/.containerenv" ] || echo "127.0.0.1 mysql elasticsearch smtpimap" | sudo tee -a /etc/hosts'
137+
run: '[ -e "/.dockerenv" ] || [ -e "/run/.containerenv" ] || echo "127.0.0.1 mysql elasticsearch meilisearch smtpimap" | sudo tee -a /etc/hosts'
132138
- run: make deps-backend
133139
- run: make backend
134140
env:

models/db/common.go

+17
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,20 @@ func BuildCaseInsensitiveLike(key, value string) builder.Cond {
2020
}
2121
return builder.Like{"UPPER(" + key + ")", strings.ToUpper(value)}
2222
}
23+
24+
// BuildCaseInsensitiveIn returns a condition to check if the given value is in the given values case-insensitively.
25+
// Handles especially SQLite correctly as UPPER there only transforms ASCII letters.
26+
func BuildCaseInsensitiveIn(key string, values []string) builder.Cond {
27+
uppers := make([]string, 0, len(values))
28+
if setting.Database.Type.IsSQLite3() {
29+
for _, value := range values {
30+
uppers = append(uppers, util.ToUpperASCII(value))
31+
}
32+
} else {
33+
for _, value := range values {
34+
uppers = append(uppers, strings.ToUpper(value))
35+
}
36+
}
37+
38+
return builder.In("UPPER("+key+")", uppers)
39+
}

models/issues/issue.go

+24-2
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
project_model "code.gitea.io/gitea/models/project"
1414
repo_model "code.gitea.io/gitea/models/repo"
1515
user_model "code.gitea.io/gitea/models/user"
16+
"code.gitea.io/gitea/modules/container"
1617
"code.gitea.io/gitea/modules/log"
1718
"code.gitea.io/gitea/modules/setting"
1819
api "code.gitea.io/gitea/modules/structs"
@@ -550,9 +551,30 @@ func GetIssueWithAttrsByID(id int64) (*Issue, error) {
550551
}
551552

552553
// GetIssuesByIDs return issues with the given IDs.
553-
func GetIssuesByIDs(ctx context.Context, issueIDs []int64) (IssueList, error) {
554+
// If keepOrder is true, the order of the returned issues will be the same as the given IDs.
555+
func GetIssuesByIDs(ctx context.Context, issueIDs []int64, keepOrder ...bool) (IssueList, error) {
554556
issues := make([]*Issue, 0, len(issueIDs))
555-
return issues, db.GetEngine(ctx).In("id", issueIDs).Find(&issues)
557+
558+
if err := db.GetEngine(ctx).In("id", issueIDs).Find(&issues); err != nil {
559+
return nil, err
560+
}
561+
562+
if len(keepOrder) > 0 && keepOrder[0] {
563+
m := make(map[int64]*Issue, len(issues))
564+
appended := container.Set[int64]{}
565+
for _, issue := range issues {
566+
m[issue.ID] = issue
567+
}
568+
issues = issues[:0]
569+
for _, id := range issueIDs {
570+
if issue, ok := m[id]; ok && !appended.Contains(id) { // make sure the id is existed and not appended
571+
appended.Add(id)
572+
issues = append(issues, issue)
573+
}
574+
}
575+
}
576+
577+
return issues, nil
556578
}
557579

558580
// GetIssueIDsByRepoID returns all issue ids by repo id

models/issues/issue_search.go

+36-45
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ import (
2121

2222
// IssuesOptions represents options of an issue.
2323
type IssuesOptions struct { //nolint
24-
db.ListOptions
24+
db.Paginator
2525
RepoIDs []int64 // overwrites RepoCond if the length is not 0
2626
RepoCond builder.Cond
2727
AssigneeID int64
@@ -99,15 +99,28 @@ func applySorts(sess *xorm.Session, sortType string, priorityRepoID int64) {
9999
}
100100

101101
func applyLimit(sess *xorm.Session, opts *IssuesOptions) *xorm.Session {
102-
if opts.Page >= 0 && opts.PageSize > 0 {
103-
var start int
104-
if opts.Page == 0 {
105-
start = 0
106-
} else {
107-
start = (opts.Page - 1) * opts.PageSize
102+
if opts.Paginator == nil || opts.Paginator.IsListAll() {
103+
return sess
104+
}
105+
106+
// Warning: Do not use GetSkipTake() for *db.ListOptions
107+
// Its implementation could reset the page size with setting.API.MaxResponseItems
108+
if listOptions, ok := opts.Paginator.(*db.ListOptions); ok {
109+
if listOptions.Page >= 0 && listOptions.PageSize > 0 {
110+
var start int
111+
if listOptions.Page == 0 {
112+
start = 0
113+
} else {
114+
start = (listOptions.Page - 1) * listOptions.PageSize
115+
}
116+
sess.Limit(listOptions.PageSize, start)
108117
}
109-
sess.Limit(opts.PageSize, start)
118+
return sess
110119
}
120+
121+
start, limit := opts.Paginator.GetSkipTake()
122+
sess.Limit(limit, start)
123+
111124
return sess
112125
}
113126

@@ -435,7 +448,7 @@ func Issues(ctx context.Context, opts *IssuesOptions) ([]*Issue, error) {
435448
applyConditions(sess, opts)
436449
applySorts(sess, opts.SortType, opts.PriorityRepoID)
437450

438-
issues := make(IssueList, 0, opts.ListOptions.PageSize)
451+
issues := IssueList{}
439452
if err := sess.Find(&issues); err != nil {
440453
return nil, fmt.Errorf("unable to query Issues: %w", err)
441454
}
@@ -447,45 +460,23 @@ func Issues(ctx context.Context, opts *IssuesOptions) ([]*Issue, error) {
447460
return issues, nil
448461
}
449462

450-
// SearchIssueIDsByKeyword search issues on database
451-
func SearchIssueIDsByKeyword(ctx context.Context, kw string, repoIDs []int64, limit, start int) (int64, []int64, error) {
452-
repoCond := builder.In("repo_id", repoIDs)
453-
subQuery := builder.Select("id").From("issue").Where(repoCond)
454-
cond := builder.And(
455-
repoCond,
456-
builder.Or(
457-
db.BuildCaseInsensitiveLike("name", kw),
458-
db.BuildCaseInsensitiveLike("content", kw),
459-
builder.In("id", builder.Select("issue_id").
460-
From("comment").
461-
Where(builder.And(
462-
builder.Eq{"type": CommentTypeComment},
463-
builder.In("issue_id", subQuery),
464-
db.BuildCaseInsensitiveLike("content", kw),
465-
)),
466-
),
467-
),
468-
)
469-
470-
ids := make([]int64, 0, limit)
471-
res := make([]struct {
472-
ID int64
473-
UpdatedUnix int64
474-
}, 0, limit)
475-
err := db.GetEngine(ctx).Distinct("id", "updated_unix").Table("issue").Where(cond).
476-
OrderBy("`updated_unix` DESC").Limit(limit, start).
477-
Find(&res)
478-
if err != nil {
479-
return 0, nil, err
480-
}
481-
for _, r := range res {
482-
ids = append(ids, r.ID)
463+
// IssueIDs returns a list of issue ids by given conditions.
464+
func IssueIDs(ctx context.Context, opts *IssuesOptions, otherConds ...builder.Cond) ([]int64, int64, error) {
465+
sess := db.GetEngine(ctx).
466+
Join("INNER", "repository", "`issue`.repo_id = `repository`.id")
467+
applyConditions(sess, opts)
468+
for _, cond := range otherConds {
469+
sess.And(cond)
483470
}
484471

485-
total, err := db.GetEngine(ctx).Distinct("id").Table("issue").Where(cond).Count()
472+
applyLimit(sess, opts)
473+
applySorts(sess, opts.SortType, opts.PriorityRepoID)
474+
475+
var res []int64
476+
total, err := sess.Select("`issue`.id").Table(&Issue{}).FindAndCount(&res)
486477
if err != nil {
487-
return 0, nil, err
478+
return nil, 0, err
488479
}
489480

490-
return total, ids, nil
481+
return res, total, nil
491482
}

models/issues/issue_test.go

+18-29
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ func TestIssueAPIURL(t *testing.T) {
7373
func TestGetIssuesByIDs(t *testing.T) {
7474
assert.NoError(t, unittest.PrepareTestDatabase())
7575
testSuccess := func(expectedIssueIDs, nonExistentIssueIDs []int64) {
76-
issues, err := issues_model.GetIssuesByIDs(db.DefaultContext, append(expectedIssueIDs, nonExistentIssueIDs...))
76+
issues, err := issues_model.GetIssuesByIDs(db.DefaultContext, append(expectedIssueIDs, nonExistentIssueIDs...), true)
7777
assert.NoError(t, err)
7878
actualIssueIDs := make([]int64, len(issues))
7979
for i, issue := range issues {
@@ -83,6 +83,7 @@ func TestGetIssuesByIDs(t *testing.T) {
8383
}
8484
testSuccess([]int64{1, 2, 3}, []int64{})
8585
testSuccess([]int64{1, 2, 3}, []int64{unittest.NonexistentID})
86+
testSuccess([]int64{3, 2, 1}, []int64{})
8687
}
8788

8889
func TestGetParticipantIDsByIssue(t *testing.T) {
@@ -165,7 +166,7 @@ func TestIssues(t *testing.T) {
165166
issues_model.IssuesOptions{
166167
RepoCond: builder.In("repo_id", 1, 3),
167168
SortType: "oldest",
168-
ListOptions: db.ListOptions{
169+
Paginator: &db.ListOptions{
169170
Page: 1,
170171
PageSize: 4,
171172
},
@@ -175,7 +176,7 @@ func TestIssues(t *testing.T) {
175176
{
176177
issues_model.IssuesOptions{
177178
LabelIDs: []int64{1},
178-
ListOptions: db.ListOptions{
179+
Paginator: &db.ListOptions{
179180
Page: 1,
180181
PageSize: 4,
181182
},
@@ -185,7 +186,7 @@ func TestIssues(t *testing.T) {
185186
{
186187
issues_model.IssuesOptions{
187188
LabelIDs: []int64{1, 2},
188-
ListOptions: db.ListOptions{
189+
Paginator: &db.ListOptions{
189190
Page: 1,
190191
PageSize: 4,
191192
},
@@ -333,30 +334,6 @@ func TestIssue_loadTotalTimes(t *testing.T) {
333334
assert.Equal(t, int64(3682), ms.TotalTrackedTime)
334335
}
335336

336-
func TestIssue_SearchIssueIDsByKeyword(t *testing.T) {
337-
assert.NoError(t, unittest.PrepareTestDatabase())
338-
total, ids, err := issues_model.SearchIssueIDsByKeyword(context.TODO(), "issue2", []int64{1}, 10, 0)
339-
assert.NoError(t, err)
340-
assert.EqualValues(t, 1, total)
341-
assert.EqualValues(t, []int64{2}, ids)
342-
343-
total, ids, err = issues_model.SearchIssueIDsByKeyword(context.TODO(), "first", []int64{1}, 10, 0)
344-
assert.NoError(t, err)
345-
assert.EqualValues(t, 1, total)
346-
assert.EqualValues(t, []int64{1}, ids)
347-
348-
total, ids, err = issues_model.SearchIssueIDsByKeyword(context.TODO(), "for", []int64{1}, 10, 0)
349-
assert.NoError(t, err)
350-
assert.EqualValues(t, 5, total)
351-
assert.ElementsMatch(t, []int64{1, 2, 3, 5, 11}, ids)
352-
353-
// issue1's comment id 2
354-
total, ids, err = issues_model.SearchIssueIDsByKeyword(context.TODO(), "good", []int64{1}, 10, 0)
355-
assert.NoError(t, err)
356-
assert.EqualValues(t, 1, total)
357-
assert.EqualValues(t, []int64{1}, ids)
358-
}
359-
360337
func TestGetRepoIDsForIssuesOptions(t *testing.T) {
361338
assert.NoError(t, unittest.PrepareTestDatabase())
362339
user := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: 2})
@@ -496,7 +473,19 @@ func TestCorrectIssueStats(t *testing.T) {
496473
wg.Wait()
497474

498475
// Now we will get all issueID's that match the "Bugs are nasty" query.
499-
total, ids, err := issues_model.SearchIssueIDsByKeyword(context.TODO(), "Bugs are nasty", []int64{1}, issueAmount, 0)
476+
issues, err := issues_model.Issues(context.TODO(), &issues_model.IssuesOptions{
477+
Paginator: &db.ListOptions{
478+
PageSize: issueAmount,
479+
},
480+
RepoIDs: []int64{1},
481+
})
482+
total := int64(len(issues))
483+
var ids []int64
484+
for _, issue := range issues {
485+
if issue.Content == "Bugs are nasty" {
486+
ids = append(ids, issue.ID)
487+
}
488+
}
500489

501490
// Just to be sure.
502491
assert.NoError(t, err)

models/issues/issue_user.go

+10
Original file line numberDiff line numberDiff line change
@@ -84,3 +84,13 @@ func UpdateIssueUsersByMentions(ctx context.Context, issueID int64, uids []int64
8484
}
8585
return nil
8686
}
87+
88+
// GetIssueMentionIDs returns all mentioned user IDs of an issue.
89+
func GetIssueMentionIDs(ctx context.Context, issueID int64) ([]int64, error) {
90+
var ids []int64
91+
return ids, db.GetEngine(ctx).Table(IssueUser{}).
92+
Where("issue_id=?", issueID).
93+
And("is_mentioned=?", true).
94+
Select("uid").
95+
Find(&ids)
96+
}

models/issues/label.go

+14-2
Original file line numberDiff line numberDiff line change
@@ -272,12 +272,12 @@ func GetLabelByID(ctx context.Context, labelID int64) (*Label, error) {
272272
}
273273

274274
// GetLabelsByIDs returns a list of labels by IDs
275-
func GetLabelsByIDs(labelIDs []int64) ([]*Label, error) {
275+
func GetLabelsByIDs(labelIDs []int64, cols ...string) ([]*Label, error) {
276276
labels := make([]*Label, 0, len(labelIDs))
277277
return labels, db.GetEngine(db.DefaultContext).Table("label").
278278
In("id", labelIDs).
279279
Asc("name").
280-
Cols("id", "repo_id", "org_id").
280+
Cols(cols...).
281281
Find(&labels)
282282
}
283283

@@ -476,6 +476,18 @@ func GetLabelsByOrgID(ctx context.Context, orgID int64, sortType string, listOpt
476476
return labels, sess.Find(&labels)
477477
}
478478

479+
// GetLabelIDsByNames returns a list of labelIDs by names.
480+
// It doesn't filter them by repo or org, so it could return labels belonging to different repos/orgs.
481+
// It's used for filtering issues via indexer, otherwise it would be useless.
482+
// Since it could return labels with the same name, so the length of returned ids could be more than the length of names.
483+
func GetLabelIDsByNames(ctx context.Context, labelNames []string) ([]int64, error) {
484+
labelIDs := make([]int64, 0, len(labelNames))
485+
return labelIDs, db.GetEngine(ctx).Table("label").
486+
In("name", labelNames).
487+
Cols("id").
488+
Find(&labelIDs)
489+
}
490+
479491
// CountLabelsByOrgID count all labels that belong to given organization by ID.
480492
func CountLabelsByOrgID(orgID int64) (int64, error) {
481493
return db.GetEngine(db.DefaultContext).Where("org_id = ?", orgID).Count(&Label{})

models/issues/milestone.go

+12
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,18 @@ func GetMilestones(opts GetMilestonesOption) (MilestoneList, int64, error) {
396396
return miles, total, err
397397
}
398398

399+
// GetMilestoneIDsByNames returns a list of milestone ids by given names.
400+
// It doesn't filter them by repo, so it could return milestones belonging to different repos.
401+
// It's used for filtering issues via indexer, otherwise it would be useless.
402+
// Since it could return milestones with the same name, so the length of returned ids could be more than the length of names.
403+
func GetMilestoneIDsByNames(ctx context.Context, names []string) ([]int64, error) {
404+
var ids []int64
405+
return ids, db.GetEngine(ctx).Table("milestone").
406+
Where(db.BuildCaseInsensitiveIn("name", names)).
407+
Cols("id").
408+
Find(&ids)
409+
}
410+
399411
// SearchMilestones search milestones
400412
func SearchMilestones(repoCond builder.Cond, page int, isClosed bool, sortType, keyword string) (MilestoneList, error) {
401413
miles := make([]*Milestone, 0, setting.UI.IssuePagingNum)

modules/indexer/code/bleve/bleve.go

+2-11
Original file line numberDiff line numberDiff line change
@@ -41,15 +41,6 @@ const (
4141
maxBatchSize = 16
4242
)
4343

44-
// numericEqualityQuery a numeric equality query for the given value and field
45-
func numericEqualityQuery(value int64, field string) *query.NumericRangeQuery {
46-
f := float64(value)
47-
tru := true
48-
q := bleve.NewNumericRangeInclusiveQuery(&f, &f, &tru, &tru)
49-
q.SetField(field)
50-
return q
51-
}
52-
5344
func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
5445
return m.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{
5546
"type": unicodenorm.Name,
@@ -225,7 +216,7 @@ func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha st
225216

226217
// Delete deletes indexes by ids
227218
func (b *Indexer) Delete(_ context.Context, repoID int64) error {
228-
query := numericEqualityQuery(repoID, "RepoID")
219+
query := inner_bleve.NumericEqualityQuery(repoID, "RepoID")
229220
searchRequest := bleve.NewSearchRequestOptions(query, 2147483647, 0, false)
230221
result, err := b.inner.Indexer.Search(searchRequest)
231222
if err != nil {
@@ -262,7 +253,7 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword
262253
if len(repoIDs) > 0 {
263254
repoQueries := make([]query.Query, 0, len(repoIDs))
264255
for _, repoID := range repoIDs {
265-
repoQueries = append(repoQueries, numericEqualityQuery(repoID, "RepoID"))
256+
repoQueries = append(repoQueries, inner_bleve.NumericEqualityQuery(repoID, "RepoID"))
266257
}
267258

268259
indexerQuery = bleve.NewConjunctionQuery(

0 commit comments

Comments
 (0)