Skip to content

Commit 4734d43

Browse files
Support repo code search without setting up an indexer (#29998)
By using git's ability, end users (especially small instance users) do not need to enable the indexer, they could also benefit from the code searching feature. Fix #29996 ![image](https://github.com/go-gitea/gitea/assets/2114189/11b7e458-88a4-480d-b4d7-72ee59406dd1) ![image](https://github.com/go-gitea/gitea/assets/2114189/0fe777d5-c95c-4288-a818-0427680805b6) --------- Co-authored-by: silverwind <[email protected]>
1 parent 90a4f9a commit 4734d43

File tree

12 files changed

+253
-65
lines changed

12 files changed

+253
-65
lines changed

docs/content/administration/repo-indexer.en-us.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,12 @@ menu:
1717

1818
# Repository indexer
1919

20+
## Builtin repository code search without indexer
21+
22+
Users could do repository-level code search without setting up a repository indexer.
23+
The builtin code search is based on the `git grep` command, which is fast and efficient for small repositories.
24+
Better code search support could be achieved by setting up the repository indexer.
25+
2026
## Setting up the repository indexer
2127

2228
Gitea can search through the files of the repositories by enabling this function in your [`app.ini`](administration/config-cheat-sheet.md):

docs/content/installation/comparison.en-us.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,9 @@ _Symbols used in table:_
8787
| Git Blame |||||||||
8888
| Visual comparison of image changes |||| ? | ? | ? |||
8989

90+
- Gitea has builtin repository-level code search
91+
- Better code search support could be achieved by [using a repository indexer](administration/repo-indexer.md)
92+
9093
## Issue Tracker
9194

9295
| Feature | Gitea | Gogs | GitHub EE | GitLab CE | GitLab EE | BitBucket | RhodeCode CE | RhodeCode EE |

modules/git/command.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,6 @@ type RunStdError interface {
367367
error
368368
Unwrap() error
369369
Stderr() string
370-
IsExitCode(code int) bool
371370
}
372371

373372
type runStdError struct {
@@ -392,9 +391,9 @@ func (r *runStdError) Stderr() string {
392391
return r.stderr
393392
}
394393

395-
func (r *runStdError) IsExitCode(code int) bool {
394+
func IsErrorExitCode(err error, code int) bool {
396395
var exitError *exec.ExitError
397-
if errors.As(r.err, &exitError) {
396+
if errors.As(err, &exitError) {
398397
return exitError.ExitCode() == code
399398
}
400399
return false

modules/git/git.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,7 @@ func checkGitVersionCompatibility(gitVer *version.Version) error {
341341

342342
func configSet(key, value string) error {
343343
stdout, _, err := NewCommand(DefaultContext, "config", "--global", "--get").AddDynamicArguments(key).RunStdString(nil)
344-
if err != nil && !err.IsExitCode(1) {
344+
if err != nil && !IsErrorExitCode(err, 1) {
345345
return fmt.Errorf("failed to get git config %s, err: %w", key, err)
346346
}
347347

@@ -364,7 +364,7 @@ func configSetNonExist(key, value string) error {
364364
// already exist
365365
return nil
366366
}
367-
if err.IsExitCode(1) {
367+
if IsErrorExitCode(err, 1) {
368368
// not exist, set new config
369369
_, _, err = NewCommand(DefaultContext, "config", "--global").AddDynamicArguments(key, value).RunStdString(nil)
370370
if err != nil {
@@ -382,7 +382,7 @@ func configAddNonExist(key, value string) error {
382382
// already exist
383383
return nil
384384
}
385-
if err.IsExitCode(1) {
385+
if IsErrorExitCode(err, 1) {
386386
// not exist, add new config
387387
_, _, err = NewCommand(DefaultContext, "config", "--global", "--add").AddDynamicArguments(key, value).RunStdString(nil)
388388
if err != nil {
@@ -403,7 +403,7 @@ func configUnsetAll(key, value string) error {
403403
}
404404
return nil
405405
}
406-
if err.IsExitCode(1) {
406+
if IsErrorExitCode(err, 1) {
407407
// not exist
408408
return nil
409409
}

modules/git/grep.go

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
// Copyright 2024 The Gitea Authors. All rights reserved.
2+
// SPDX-License-Identifier: MIT
3+
4+
package git
5+
6+
import (
7+
"bufio"
8+
"bytes"
9+
"context"
10+
"errors"
11+
"fmt"
12+
"os"
13+
"strconv"
14+
"strings"
15+
16+
"code.gitea.io/gitea/modules/util"
17+
)
18+
19+
type GrepResult struct {
20+
Filename string
21+
LineNumbers []int
22+
LineCodes []string
23+
}
24+
25+
type GrepOptions struct {
26+
RefName string
27+
ContextLineNumber int
28+
IsFuzzy bool
29+
}
30+
31+
func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepOptions) ([]*GrepResult, error) {
32+
stdoutReader, stdoutWriter, err := os.Pipe()
33+
if err != nil {
34+
return nil, fmt.Errorf("unable to create os pipe to grep: %w", err)
35+
}
36+
defer func() {
37+
_ = stdoutReader.Close()
38+
_ = stdoutWriter.Close()
39+
}()
40+
41+
/*
42+
The output is like this ( "^@" means \x00):
43+
44+
HEAD:.air.toml
45+
6^@bin = "gitea"
46+
47+
HEAD:.changelog.yml
48+
2^@repo: go-gitea/gitea
49+
*/
50+
var results []*GrepResult
51+
cmd := NewCommand(ctx, "grep", "--null", "--break", "--heading", "--fixed-strings", "--line-number", "--ignore-case", "--full-name")
52+
cmd.AddOptionValues("--context", fmt.Sprint(opts.ContextLineNumber))
53+
if opts.IsFuzzy {
54+
words := strings.Fields(search)
55+
for _, word := range words {
56+
cmd.AddOptionValues("-e", strings.TrimLeft(word, "-"))
57+
}
58+
} else {
59+
cmd.AddOptionValues("-e", strings.TrimLeft(search, "-"))
60+
}
61+
cmd.AddDynamicArguments(util.IfZero(opts.RefName, "HEAD"))
62+
stderr := bytes.Buffer{}
63+
err = cmd.Run(&RunOpts{
64+
Dir: repo.Path,
65+
Stdout: stdoutWriter,
66+
Stderr: &stderr,
67+
PipelineFunc: func(ctx context.Context, cancel context.CancelFunc) error {
68+
_ = stdoutWriter.Close()
69+
defer stdoutReader.Close()
70+
71+
isInBlock := false
72+
scanner := bufio.NewScanner(stdoutReader)
73+
var res *GrepResult
74+
for scanner.Scan() {
75+
line := scanner.Text()
76+
if !isInBlock {
77+
if _ /* ref */, filename, ok := strings.Cut(line, ":"); ok {
78+
isInBlock = true
79+
res = &GrepResult{Filename: filename}
80+
results = append(results, res)
81+
}
82+
continue
83+
}
84+
if line == "" {
85+
if len(results) >= 50 {
86+
cancel()
87+
break
88+
}
89+
isInBlock = false
90+
continue
91+
}
92+
if line == "--" {
93+
continue
94+
}
95+
if lineNum, lineCode, ok := strings.Cut(line, "\x00"); ok {
96+
lineNumInt, _ := strconv.Atoi(lineNum)
97+
res.LineNumbers = append(res.LineNumbers, lineNumInt)
98+
res.LineCodes = append(res.LineCodes, lineCode)
99+
}
100+
}
101+
return scanner.Err()
102+
},
103+
})
104+
// git grep exits with 1 if no results are found
105+
if IsErrorExitCode(err, 1) && stderr.Len() == 0 {
106+
return nil, nil
107+
}
108+
if err != nil && !errors.Is(err, context.Canceled) {
109+
return nil, fmt.Errorf("unable to run git grep: %w, stderr: %s", err, stderr.String())
110+
}
111+
return results, nil
112+
}

modules/git/grep_test.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
// Copyright 2024 The Gitea Authors. All rights reserved.
2+
// SPDX-License-Identifier: MIT
3+
4+
package git
5+
6+
import (
7+
"context"
8+
"path/filepath"
9+
"testing"
10+
11+
"github.com/stretchr/testify/assert"
12+
)
13+
14+
func TestGrepSearch(t *testing.T) {
15+
repo, err := openRepositoryWithDefaultContext(filepath.Join(testReposDir, "language_stats_repo"))
16+
assert.NoError(t, err)
17+
defer repo.Close()
18+
19+
res, err := GrepSearch(context.Background(), repo, "void", GrepOptions{})
20+
assert.NoError(t, err)
21+
assert.Equal(t, []*GrepResult{
22+
{
23+
Filename: "java-hello/main.java",
24+
LineNumbers: []int{3},
25+
LineCodes: []string{" public static void main(String[] args)"},
26+
},
27+
{
28+
Filename: "main.vendor.java",
29+
LineNumbers: []int{3},
30+
LineCodes: []string{" public static void main(String[] args)"},
31+
},
32+
}, res)
33+
34+
res, err = GrepSearch(context.Background(), repo, "no-such-content", GrepOptions{})
35+
assert.NoError(t, err)
36+
assert.Len(t, res, 0)
37+
38+
res, err = GrepSearch(context.Background(), &Repository{Path: "no-such-git-repo"}, "no-such-content", GrepOptions{})
39+
assert.Error(t, err)
40+
assert.Len(t, res, 0)
41+
}

modules/indexer/code/search.go

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -70,13 +70,27 @@ func writeStrings(buf *bytes.Buffer, strs ...string) error {
7070
return nil
7171
}
7272

73+
func HighlightSearchResultCode(filename string, lineNums []int, code string) []ResultLine {
74+
// we should highlight the whole code block first, otherwise it doesn't work well with multiple line highlighting
75+
hl, _ := highlight.Code(filename, "", code)
76+
highlightedLines := strings.Split(string(hl), "\n")
77+
78+
// The lineNums outputted by highlight.Code might not match the original lineNums, because "highlight" removes the last `\n`
79+
lines := make([]ResultLine, min(len(highlightedLines), len(lineNums)))
80+
for i := 0; i < len(lines); i++ {
81+
lines[i].Num = lineNums[i]
82+
lines[i].FormattedContent = template.HTML(highlightedLines[i])
83+
}
84+
return lines
85+
}
86+
7387
func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Result, error) {
7488
startLineNum := 1 + strings.Count(result.Content[:startIndex], "\n")
7589

7690
var formattedLinesBuffer bytes.Buffer
7791

7892
contentLines := strings.SplitAfter(result.Content[startIndex:endIndex], "\n")
79-
lines := make([]ResultLine, 0, len(contentLines))
93+
lineNums := make([]int, 0, len(contentLines))
8094
index := startIndex
8195
for i, line := range contentLines {
8296
var err error
@@ -91,37 +105,24 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res
91105
line[closeActiveIndex:],
92106
)
93107
} else {
94-
err = writeStrings(&formattedLinesBuffer,
95-
line,
96-
)
108+
err = writeStrings(&formattedLinesBuffer, line)
97109
}
98110
if err != nil {
99111
return nil, err
100112
}
101113

102-
lines = append(lines, ResultLine{Num: startLineNum + i})
114+
lineNums = append(lineNums, startLineNum+i)
103115
index += len(line)
104116
}
105117

106-
// we should highlight the whole code block first, otherwise it doesn't work well with multiple line highlighting
107-
hl, _ := highlight.Code(result.Filename, "", formattedLinesBuffer.String())
108-
highlightedLines := strings.Split(string(hl), "\n")
109-
110-
// The lines outputted by highlight.Code might not match the original lines, because "highlight" removes the last `\n`
111-
lines = lines[:min(len(highlightedLines), len(lines))]
112-
highlightedLines = highlightedLines[:len(lines)]
113-
for i := 0; i < len(lines); i++ {
114-
lines[i].FormattedContent = template.HTML(highlightedLines[i])
115-
}
116-
117118
return &Result{
118119
RepoID: result.RepoID,
119120
Filename: result.Filename,
120121
CommitID: result.CommitID,
121122
UpdatedUnix: result.UpdatedUnix,
122123
Language: result.Language,
123124
Color: result.Color,
124-
Lines: lines,
125+
Lines: HighlightSearchResultCode(result.Filename, lineNums, formattedLinesBuffer.String()),
125126
}, nil
126127
}
127128

options/locale/locale_en-US.ini

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ org_kind = Search orgs...
172172
team_kind = Search teams...
173173
code_kind = Search code...
174174
code_search_unavailable = Code search is currently not available. Please contact the site administrator.
175+
code_search_by_git_grep = Current code search results are provided by "git grep". There might be better results if site administrator enables Repository Indexer.
175176
package_kind = Search packages...
176177
project_kind = Search projects...
177178
branch_kind = Search branches...

0 commit comments

Comments
 (0)