From 9321c271c0a63567c0001276f94614981709ca2f Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Tue, 8 Apr 2025 12:06:47 -0700 Subject: [PATCH 01/23] Refactor Attribute --- modules/git/attribute.go | 35 ---- modules/git/attribute/attribute.go | 93 ++++++++++ .../{repo_attribute.go => attribute/batch.go} | 163 +++++------------- .../batch_test.go} | 12 +- modules/git/attribute/checker.go | 92 ++++++++++ .../language_stats.go} | 21 +-- .../language_stats_gogit.go} | 69 ++++---- .../language_stats_nogogit.go} | 85 ++++----- .../language_stats_test.go} | 10 +- modules/git/languagestats/main_test.go | 41 +++++ modules/indexer/stats/db.go | 3 +- routers/web/repo/setting/lfs.go | 5 +- routers/web/repo/view_file.go | 19 +- services/gitdiff/gitdiff.go | 13 +- services/repository/files/content.go | 7 +- services/repository/files/update.go | 5 +- services/repository/files/upload.go | 11 +- 17 files changed, 398 insertions(+), 286 deletions(-) delete mode 100644 modules/git/attribute.go create mode 100644 modules/git/attribute/attribute.go rename modules/git/{repo_attribute.go => attribute/batch.go} (57%) rename modules/git/{repo_attribute_test.go => attribute/batch_test.go} (90%) create mode 100644 modules/git/attribute/checker.go rename modules/git/{repo_language_stats.go => languagestats/language_stats.go} (59%) rename modules/git/{repo_language_stats_gogit.go => languagestats/language_stats_gogit.go} (74%) rename modules/git/{repo_language_stats_nogogit.go => languagestats/language_stats_nogogit.go} (73%) rename modules/git/{repo_language_stats_test.go => languagestats/language_stats_test.go} (75%) create mode 100644 modules/git/languagestats/main_test.go diff --git a/modules/git/attribute.go b/modules/git/attribute.go deleted file mode 100644 index 4dfa510369086..0000000000000 --- a/modules/git/attribute.go +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright 2024 The Gitea Authors. All rights reserved. -// SPDX-License-Identifier: MIT - -package git - -import ( - "code.gitea.io/gitea/modules/optional" -) - -const ( - AttributeLinguistVendored = "linguist-vendored" - AttributeLinguistGenerated = "linguist-generated" - AttributeLinguistDocumentation = "linguist-documentation" - AttributeLinguistDetectable = "linguist-detectable" - AttributeLinguistLanguage = "linguist-language" - AttributeGitlabLanguage = "gitlab-language" -) - -// true if "set"/"true", false if "unset"/"false", none otherwise -func AttributeToBool(attr map[string]string, name string) optional.Option[bool] { - switch attr[name] { - case "set", "true": - return optional.Some(true) - case "unset", "false": - return optional.Some(false) - } - return optional.None[bool]() -} - -func AttributeToString(attr map[string]string, name string) optional.Option[string] { - if value, has := attr[name]; has && value != "unspecified" { - return optional.Some(value) - } - return optional.None[string]() -} diff --git a/modules/git/attribute/attribute.go b/modules/git/attribute/attribute.go new file mode 100644 index 0000000000000..43a1bf3cd8517 --- /dev/null +++ b/modules/git/attribute/attribute.go @@ -0,0 +1,93 @@ +// Copyright 2025 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package attribute + +import ( + "strings" + + "code.gitea.io/gitea/modules/optional" +) + +type Attribute string + +const ( + LinguistVendored = "linguist-vendored" + LinguistGenerated = "linguist-generated" + LinguistDocumentation = "linguist-documentation" + LinguistDetectable = "linguist-detectable" + LinguistLanguage = "linguist-language" + GitlabLanguage = "gitlab-language" +) + +func (a Attribute) ToString() optional.Option[string] { + if a != "" && a != "unspecified" { + return optional.Some(string(a)) + } + return optional.None[string]() +} + +// true if "set"/"true", false if "unset"/"false", none otherwise +func (a Attribute) ToBool() optional.Option[bool] { + switch a { + case "set", "true": + return optional.Some(true) + case "unset", "false": + return optional.Some(false) + } + return optional.None[bool]() +} + +type Attributes map[string]Attribute + +func (attrs Attributes) Get(name string) Attribute { + if value, has := attrs[name]; has { + return value + } + return "" +} + +func (attrs Attributes) HasVendored() optional.Option[bool] { + return attrs.Get(LinguistVendored).ToBool() +} + +func (attrs Attributes) HasGenerated() optional.Option[bool] { + return attrs.Get(LinguistGenerated).ToBool() +} + +func (attrs Attributes) HasDocumentation() optional.Option[bool] { + return attrs.Get(LinguistDocumentation).ToBool() +} + +func (attrs Attributes) HasDetectable() optional.Option[bool] { + return attrs.Get(LinguistDetectable).ToBool() +} + +func (attrs Attributes) LinguistLanguage() optional.Option[string] { + return attrs.Get(LinguistLanguage).ToString() +} + +func (attrs Attributes) GitlabLanguage() optional.Option[string] { + attrStr := attrs.Get(GitlabLanguage).ToString() + if attrStr.Has() { + raw := attrStr.Value() + // gitlab-language may have additional parameters after the language + // ignore them and just use the main language + // https://docs.gitlab.com/ee/user/project/highlighting.html#override-syntax-highlighting-for-a-file-type + if idx := strings.IndexByte(raw, '?'); idx >= 0 { + return optional.Some(raw[:idx]) + } + } + return attrStr +} + +func (attrs Attributes) Language() optional.Option[string] { + // prefer linguist-language over gitlab-language + // if linguist-language is not set, use gitlab-language + // if both are not set, return none + language := attrs.LinguistLanguage() + if language.Value() == "" { + language = attrs.GitlabLanguage() + } + return language +} diff --git a/modules/git/repo_attribute.go b/modules/git/attribute/batch.go similarity index 57% rename from modules/git/repo_attribute.go rename to modules/git/attribute/batch.go index fde42d4730c1d..5f255a5fb8203 100644 --- a/modules/git/repo_attribute.go +++ b/modules/git/attribute/batch.go @@ -1,7 +1,7 @@ // Copyright 2019 The Gitea Authors. All rights reserved. // SPDX-License-Identifier: MIT -package git +package attribute import ( "bytes" @@ -13,107 +13,29 @@ import ( "path/filepath" "time" + "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/log" ) -// CheckAttributeOpts represents the possible options to CheckAttribute -type CheckAttributeOpts struct { - CachedOnly bool - AllAttributes bool - Attributes []string - Filenames []string - IndexFile string - WorkTree string -} - -// CheckAttribute return the Blame object of file -func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[string]string, error) { - env := []string{} - - if len(opts.IndexFile) > 0 { - env = append(env, "GIT_INDEX_FILE="+opts.IndexFile) - } - if len(opts.WorkTree) > 0 { - env = append(env, "GIT_WORK_TREE="+opts.WorkTree) - } - - if len(env) > 0 { - env = append(os.Environ(), env...) - } - - stdOut := new(bytes.Buffer) - stdErr := new(bytes.Buffer) - - cmd := NewCommand("check-attr", "-z") - - if opts.AllAttributes { - cmd.AddArguments("-a") - } else { - for _, attribute := range opts.Attributes { - if attribute != "" { - cmd.AddDynamicArguments(attribute) - } - } - } - - if opts.CachedOnly { - cmd.AddArguments("--cached") - } - - cmd.AddDashesAndList(opts.Filenames...) - - if err := cmd.Run(repo.Ctx, &RunOpts{ - Env: env, - Dir: repo.Path, - Stdout: stdOut, - Stderr: stdErr, - }); err != nil { - return nil, fmt.Errorf("failed to run check-attr: %w\n%s\n%s", err, stdOut.String(), stdErr.String()) - } - - // FIXME: This is incorrect on versions < 1.8.5 - fields := bytes.Split(stdOut.Bytes(), []byte{'\000'}) - - if len(fields)%3 != 1 { - return nil, errors.New("wrong number of fields in return from check-attr") - } - - name2attribute2info := make(map[string]map[string]string) - - for i := 0; i < (len(fields) / 3); i++ { - filename := string(fields[3*i]) - attribute := string(fields[3*i+1]) - info := string(fields[3*i+2]) - attribute2info := name2attribute2info[filename] - if attribute2info == nil { - attribute2info = make(map[string]string) - } - attribute2info[attribute] = info - name2attribute2info[filename] = attribute2info - } - - return name2attribute2info, nil -} - -// CheckAttributeReader provides a reader for check-attribute content that can be long running -type CheckAttributeReader struct { +// BatchChecker provides a reader for check-attribute content that can be long running +type BatchChecker struct { // params Attributes []string - Repo *Repository + Repo *git.Repository IndexFile string WorkTree string stdinReader io.ReadCloser stdinWriter *os.File stdOut *nulSeparatedAttributeWriter - cmd *Command + cmd *git.Command env []string ctx context.Context cancel context.CancelFunc } -// Init initializes the CheckAttributeReader -func (c *CheckAttributeReader) Init(ctx context.Context) error { +// init initializes the AttributeChecker +func (c *BatchChecker) init(ctx context.Context) error { if len(c.Attributes) == 0 { lw := new(nulSeparatedAttributeWriter) lw.attributes = make(chan attributeTriple) @@ -125,7 +47,7 @@ func (c *CheckAttributeReader) Init(ctx context.Context) error { } c.ctx, c.cancel = context.WithCancel(ctx) - c.cmd = NewCommand("check-attr", "--stdin", "-z") + c.cmd = git.NewCommand("check-attr", "--stdin", "-z") if len(c.IndexFile) > 0 { c.cmd.AddArguments("--cached") @@ -155,27 +77,27 @@ func (c *CheckAttributeReader) Init(ctx context.Context) error { return nil } -func (c *CheckAttributeReader) Run() error { +func (c *BatchChecker) run() error { defer func() { _ = c.stdinReader.Close() _ = c.stdOut.Close() }() stdErr := new(bytes.Buffer) - err := c.cmd.Run(c.ctx, &RunOpts{ + err := c.cmd.Run(c.ctx, &git.RunOpts{ Env: c.env, Dir: c.Repo.Path, Stdin: c.stdinReader, Stdout: c.stdOut, Stderr: stdErr, }) - if err != nil && !IsErrCanceledOrKilled(err) { + if err != nil && !git.IsErrCanceledOrKilled(err) { return fmt.Errorf("failed to run attr-check. Error: %w\nStderr: %s", err, stdErr.String()) } return nil } // CheckPath check attr for given path -func (c *CheckAttributeReader) CheckPath(path string) (rs map[string]string, err error) { +func (c *BatchChecker) CheckPath(path string) (rs Attributes, err error) { defer func() { if err != nil && err != c.ctx.Err() { log.Error("Unexpected error when checking path %s in %s, error: %v", path, filepath.Base(c.Repo.Path), err) @@ -202,14 +124,15 @@ func (c *CheckAttributeReader) CheckPath(path string) (rs map[string]string, err } debugMsg := fmt.Sprintf("check path %q in repo %q", path, filepath.Base(c.Repo.Path)) debugMsg += fmt.Sprintf(", stdOut: tmp=%q, pos=%d, closed=%v", string(c.stdOut.tmp), c.stdOut.pos, stdOutClosed) - if c.cmd.cmd != nil { - debugMsg += fmt.Sprintf(", process state: %q", c.cmd.cmd.ProcessState.String()) - } + // FIXME: + //if c.cmd.cmd != nil { + // debugMsg += fmt.Sprintf(", process state: %q", c.cmd.cmd.ProcessState.String()) + //} _ = c.Close() return fmt.Errorf("CheckPath timeout: %s", debugMsg) } - rs = make(map[string]string) + rs = make(map[string]Attribute) for range c.Attributes { select { case <-time.After(5 * time.Second): @@ -222,7 +145,7 @@ func (c *CheckAttributeReader) CheckPath(path string) (rs map[string]string, err if !ok { return nil, c.ctx.Err() } - rs[attr.Attribute] = attr.Value + rs[attr.Attribute] = Attribute(attr.Value) case <-c.ctx.Done(): return nil, c.ctx.Err() } @@ -230,7 +153,7 @@ func (c *CheckAttributeReader) CheckPath(path string) (rs map[string]string, err return rs, nil } -func (c *CheckAttributeReader) Close() error { +func (c *BatchChecker) Close() error { c.cancel() err := c.stdinWriter.Close() return err @@ -299,43 +222,45 @@ func (wr *nulSeparatedAttributeWriter) Close() error { return nil } -// CheckAttributeReader creates a check attribute reader for the current repository and provided commit ID -func (repo *Repository) CheckAttributeReader(commitID string) (*CheckAttributeReader, context.CancelFunc) { +// NewBatchChecker creates a check attribute reader for the current repository and provided commit ID +func NewBatchChecker(repo *git.Repository, commitID string) (*BatchChecker, context.CancelFunc, error) { indexFilename, worktree, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID) if err != nil { - return nil, func() {} + return nil, nil, err } - checker := &CheckAttributeReader{ + checker := &BatchChecker{ Attributes: []string{ - AttributeLinguistVendored, - AttributeLinguistGenerated, - AttributeLinguistDocumentation, - AttributeLinguistDetectable, - AttributeLinguistLanguage, - AttributeGitlabLanguage, + LinguistVendored, + LinguistGenerated, + LinguistDocumentation, + LinguistDetectable, + LinguistLanguage, + GitlabLanguage, }, Repo: repo, IndexFile: indexFilename, WorkTree: worktree, } ctx, cancel := context.WithCancel(repo.Ctx) - if err := checker.Init(ctx); err != nil { - log.Error("Unable to open attribute checker for commit %s, error: %v", commitID, err) - } else { - go func() { - err := checker.Run() - if err != nil && !IsErrCanceledOrKilled(err) { - log.Error("Attribute checker for commit %s exits with error: %v", commitID, err) - } - cancel() - }() - } deferrable := func() { _ = checker.Close() cancel() deleteTemporaryFile() } + if err := checker.init(ctx); err != nil { + log.Error("Unable to open attribute checker for commit %s, error: %v", commitID, err) + deferrable() + return nil, nil, err + } + + go func() { + err := checker.run() + if err != nil && !git.IsErrCanceledOrKilled(err) { + log.Error("Attribute checker for commit %s exits with error: %v", commitID, err) + } + cancel() + }() - return checker, deferrable + return checker, deferrable, nil } diff --git a/modules/git/repo_attribute_test.go b/modules/git/attribute/batch_test.go similarity index 90% rename from modules/git/repo_attribute_test.go rename to modules/git/attribute/batch_test.go index d8fd9f0e8d857..6c67dc3b3babd 100644 --- a/modules/git/repo_attribute_test.go +++ b/modules/git/attribute/batch_test.go @@ -1,7 +1,7 @@ // Copyright 2021 The Gitea Authors. All rights reserved. // SPDX-License-Identifier: MIT -package git +package attribute import ( "testing" @@ -24,7 +24,7 @@ func Test_nulSeparatedAttributeWriter_ReadAttribute(t *testing.T) { select { case attr := <-wr.ReadAttribute(): assert.Equal(t, ".gitignore\"\n", attr.Filename) - assert.Equal(t, AttributeLinguistVendored, attr.Attribute) + assert.Equal(t, LinguistVendored, attr.Attribute) assert.Equal(t, "unspecified", attr.Value) case <-time.After(100 * time.Millisecond): assert.FailNow(t, "took too long to read an attribute from the list") @@ -38,7 +38,7 @@ func Test_nulSeparatedAttributeWriter_ReadAttribute(t *testing.T) { select { case attr := <-wr.ReadAttribute(): assert.Equal(t, ".gitignore\"\n", attr.Filename) - assert.Equal(t, AttributeLinguistVendored, attr.Attribute) + assert.Equal(t, LinguistVendored, attr.Attribute) assert.Equal(t, "unspecified", attr.Value) case <-time.After(100 * time.Millisecond): assert.FailNow(t, "took too long to read an attribute from the list") @@ -77,21 +77,21 @@ func Test_nulSeparatedAttributeWriter_ReadAttribute(t *testing.T) { assert.NoError(t, err) assert.Equal(t, attributeTriple{ Filename: "shouldbe.vendor", - Attribute: AttributeLinguistVendored, + Attribute: LinguistVendored, Value: "set", }, attr) attr = <-wr.ReadAttribute() assert.NoError(t, err) assert.Equal(t, attributeTriple{ Filename: "shouldbe.vendor", - Attribute: AttributeLinguistGenerated, + Attribute: LinguistGenerated, Value: "unspecified", }, attr) attr = <-wr.ReadAttribute() assert.NoError(t, err) assert.Equal(t, attributeTriple{ Filename: "shouldbe.vendor", - Attribute: AttributeLinguistLanguage, + Attribute: LinguistLanguage, Value: "unspecified", }, attr) } diff --git a/modules/git/attribute/checker.go b/modules/git/attribute/checker.go new file mode 100644 index 0000000000000..62a2d138e9612 --- /dev/null +++ b/modules/git/attribute/checker.go @@ -0,0 +1,92 @@ +// Copyright 2025 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package attribute + +import ( + "bytes" + "errors" + "fmt" + "os" + + "code.gitea.io/gitea/modules/git" +) + +// CheckAttributeOpts represents the possible options to CheckAttribute +type CheckAttributeOpts struct { + CachedOnly bool + AllAttributes bool + Attributes []string + Filenames []string + IndexFile string + WorkTree string +} + +// CheckAttribute return the Blame object of file +func CheckAttribute(repo *git.Repository, opts CheckAttributeOpts) (map[string]Attributes, error) { + env := []string{} + + if len(opts.IndexFile) > 0 { + env = append(env, "GIT_INDEX_FILE="+opts.IndexFile) + } + if len(opts.WorkTree) > 0 { + env = append(env, "GIT_WORK_TREE="+opts.WorkTree) + } + + if len(env) > 0 { + env = append(os.Environ(), env...) + } + + stdOut := new(bytes.Buffer) + stdErr := new(bytes.Buffer) + + cmd := git.NewCommand("check-attr", "-z") + + if opts.AllAttributes { + cmd.AddArguments("-a") + } else { + for _, attribute := range opts.Attributes { + if attribute != "" { + cmd.AddDynamicArguments(attribute) + } + } + } + + if opts.CachedOnly { + cmd.AddArguments("--cached") + } + + cmd.AddDashesAndList(opts.Filenames...) + + if err := cmd.Run(repo.Ctx, &git.RunOpts{ + Env: env, + Dir: repo.Path, + Stdout: stdOut, + Stderr: stdErr, + }); err != nil { + return nil, fmt.Errorf("failed to run check-attr: %w\n%s\n%s", err, stdOut.String(), stdErr.String()) + } + + // FIXME: This is incorrect on versions < 1.8.5 + fields := bytes.Split(stdOut.Bytes(), []byte{'\000'}) + + if len(fields)%3 != 1 { + return nil, errors.New("wrong number of fields in return from check-attr") + } + + name2attribute2info := make(map[string]Attributes) + + for i := 0; i < (len(fields) / 3); i++ { + filename := string(fields[3*i]) + attribute := string(fields[3*i+1]) + info := string(fields[3*i+2]) + attribute2info := name2attribute2info[filename] + if attribute2info == nil { + attribute2info = make(Attributes) + } + attribute2info[attribute] = Attribute(info) + name2attribute2info[filename] = attribute2info + } + + return name2attribute2info, nil +} diff --git a/modules/git/repo_language_stats.go b/modules/git/languagestats/language_stats.go similarity index 59% rename from modules/git/repo_language_stats.go rename to modules/git/languagestats/language_stats.go index 8551ea9d24e8b..fe6c95d41cf64 100644 --- a/modules/git/repo_language_stats.go +++ b/modules/git/languagestats/language_stats.go @@ -1,13 +1,11 @@ // Copyright 2020 The Gitea Authors. All rights reserved. // SPDX-License-Identifier: MIT -package git +package languagestats import ( "strings" "unicode" - - "code.gitea.io/gitea/modules/optional" ) const ( @@ -48,20 +46,3 @@ func mergeLanguageStats(stats map[string]int64) map[string]int64 { } return res } - -func TryReadLanguageAttribute(attrs map[string]string) optional.Option[string] { - language := AttributeToString(attrs, AttributeLinguistLanguage) - if language.Value() == "" { - language = AttributeToString(attrs, AttributeGitlabLanguage) - if language.Has() { - raw := language.Value() - // gitlab-language may have additional parameters after the language - // ignore them and just use the main language - // https://docs.gitlab.com/ee/user/project/highlighting.html#override-syntax-highlighting-for-a-file-type - if idx := strings.IndexByte(raw, '?'); idx >= 0 { - language = optional.Some(raw[:idx]) - } - } - } - return language -} diff --git a/modules/git/repo_language_stats_gogit.go b/modules/git/languagestats/language_stats_gogit.go similarity index 74% rename from modules/git/repo_language_stats_gogit.go rename to modules/git/languagestats/language_stats_gogit.go index a34c03c781f55..0006f8032ccdd 100644 --- a/modules/git/repo_language_stats_gogit.go +++ b/modules/git/languagestats/language_stats_gogit.go @@ -3,7 +3,7 @@ //go:build gogit -package git +package languagestats import ( "bytes" @@ -19,7 +19,7 @@ import ( ) // GetLanguageStats calculates language stats for git repository at specified commit -func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) { +func GetLanguageStats(repo *Repository, commitID string) (map[string]int64, error) { r, err := git.PlainOpen(repo.Path) if err != nil { return nil, err @@ -40,7 +40,10 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err return nil, err } - checker, deferable := repo.CheckAttributeReader(commitID) + checker, deferable, err := NewAttributeChecker(repo, commitID) + if err != nil { + return nil, err + } defer deferable() // sizes contains the current calculated size of all files by language @@ -62,43 +65,41 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err isDocumentation := optional.None[bool]() isDetectable := optional.None[bool]() - if checker != nil { - attrs, err := checker.CheckPath(f.Name) - if err == nil { - isVendored = AttributeToBool(attrs, AttributeLinguistVendored) - if isVendored.ValueOrDefault(false) { - return nil - } - - isGenerated = AttributeToBool(attrs, AttributeLinguistGenerated) - if isGenerated.ValueOrDefault(false) { - return nil - } + attrs, err := checker.CheckPath(f.Name) + if err == nil { + isVendored = attrs.HasVendored() + if isVendored.ValueOrDefault(false) { + return nil + } - isDocumentation = AttributeToBool(attrs, AttributeLinguistDocumentation) - if isDocumentation.ValueOrDefault(false) { - return nil - } + isGenerated = attrs.HasGenerated() + if isGenerated.ValueOrDefault(false) { + return nil + } - isDetectable = AttributeToBool(attrs, AttributeLinguistDetectable) - if !isDetectable.ValueOrDefault(true) { - return nil - } + isDocumentation = attrs.HasDocumentation() + if isDocumentation.ValueOrDefault(false) { + return nil + } - hasLanguage := TryReadLanguageAttribute(attrs) - if hasLanguage.Value() != "" { - language := hasLanguage.Value() + isDetectable = attrs.HasDetectable() + if !isDetectable.ValueOrDefault(true) { + return nil + } - // group languages, such as Pug -> HTML; SCSS -> CSS - group := enry.GetLanguageGroup(language) - if len(group) != 0 { - language = group - } + hasLanguage := attrs.Language() + if hasLanguage.Value() != "" { + language := hasLanguage.Value() - // this language will always be added to the size - sizes[language] += f.Size - return nil + // group languages, such as Pug -> HTML; SCSS -> CSS + group := enry.GetLanguageGroup(language) + if len(group) != 0 { + language = group } + + // this language will always be added to the size + sizes[language] += f.Size + return nil } } diff --git a/modules/git/repo_language_stats_nogogit.go b/modules/git/languagestats/language_stats_nogogit.go similarity index 73% rename from modules/git/repo_language_stats_nogogit.go rename to modules/git/languagestats/language_stats_nogogit.go index de7707bd6cd8b..61ee4e677d7c4 100644 --- a/modules/git/repo_language_stats_nogogit.go +++ b/modules/git/languagestats/language_stats_nogogit.go @@ -3,13 +3,15 @@ //go:build !gogit -package git +package languagestats import ( "bytes" "io" "code.gitea.io/gitea/modules/analyze" + "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/git/attribute" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/optional" @@ -17,7 +19,7 @@ import ( ) // GetLanguageStats calculates language stats for git repository at specified commit -func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) { +func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64, error) { // We will feed the commit IDs in order into cat-file --batch, followed by blobs as necessary. // so let's create a batch stdin and stdout batchStdinWriter, batchReader, cancel, err := repo.CatFileBatch(repo.Ctx) @@ -34,19 +36,19 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err if err := writeID(commitID); err != nil { return nil, err } - shaBytes, typ, size, err := ReadBatchLine(batchReader) + shaBytes, typ, size, err := git.ReadBatchLine(batchReader) if typ != "commit" { log.Debug("Unable to get commit for: %s. Err: %v", commitID, err) - return nil, ErrNotExist{commitID, ""} + return nil, git.ErrNotExist{ID: commitID} } - sha, err := NewIDFromString(string(shaBytes)) + sha, err := git.NewIDFromString(string(shaBytes)) if err != nil { log.Debug("Unable to get commit for: %s. Err: %v", commitID, err) - return nil, ErrNotExist{commitID, ""} + return nil, git.ErrNotExist{ID: commitID} } - commit, err := CommitFromReader(repo, sha, io.LimitReader(batchReader, size)) + commit, err := git.CommitFromReader(repo, sha, io.LimitReader(batchReader, size)) if err != nil { log.Debug("Unable to get commit for: %s. Err: %v", commitID, err) return nil, err @@ -62,7 +64,10 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err return nil, err } - checker, deferable := repo.CheckAttributeReader(commitID) + checker, deferable, err := attribute.NewBatchChecker(repo, commitID) + if err != nil { + return nil, err + } defer deferable() contentBuf := bytes.Buffer{} @@ -96,43 +101,41 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err isDocumentation := optional.None[bool]() isDetectable := optional.None[bool]() - if checker != nil { - attrs, err := checker.CheckPath(f.Name()) - if err == nil { - isVendored = AttributeToBool(attrs, AttributeLinguistVendored) - if isVendored.ValueOrDefault(false) { - continue - } - - isGenerated = AttributeToBool(attrs, AttributeLinguistGenerated) - if isGenerated.ValueOrDefault(false) { - continue - } + attrs, err := checker.CheckPath(f.Name()) + if err == nil { + isVendored = attrs.HasVendored() + if isVendored.ValueOrDefault(false) { + continue + } - isDocumentation = AttributeToBool(attrs, AttributeLinguistDocumentation) - if isDocumentation.ValueOrDefault(false) { - continue - } + isGenerated = attrs.HasGenerated() + if isGenerated.ValueOrDefault(false) { + continue + } - isDetectable = AttributeToBool(attrs, AttributeLinguistDetectable) - if !isDetectable.ValueOrDefault(true) { - continue - } + isDocumentation = attrs.HasDocumentation() + if isDocumentation.ValueOrDefault(false) { + continue + } - hasLanguage := TryReadLanguageAttribute(attrs) - if hasLanguage.Value() != "" { - language := hasLanguage.Value() + isDetectable = attrs.HasDetectable() + if !isDetectable.ValueOrDefault(true) { + continue + } - // group languages, such as Pug -> HTML; SCSS -> CSS - group := enry.GetLanguageGroup(language) - if len(group) != 0 { - language = group - } + hasLanguage := attrs.Language() + if hasLanguage.Value() != "" { + language := hasLanguage.Value() - // this language will always be added to the size - sizes[language] += f.Size() - continue + // group languages, such as Pug -> HTML; SCSS -> CSS + group := enry.GetLanguageGroup(language) + if len(group) != 0 { + language = group } + + // this language will always be added to the size + sizes[language] += f.Size() + continue } } @@ -149,7 +152,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err if err := writeID(f.ID.String()); err != nil { return nil, err } - _, _, size, err := ReadBatchLine(batchReader) + _, _, size, err := git.ReadBatchLine(batchReader) if err != nil { log.Debug("Error reading blob: %s Err: %v", f.ID.String(), err) return nil, err @@ -167,7 +170,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err return nil, err } content = contentBuf.Bytes() - if err := DiscardFull(batchReader, discard); err != nil { + if err := git.DiscardFull(batchReader, discard); err != nil { return nil, err } } diff --git a/modules/git/repo_language_stats_test.go b/modules/git/languagestats/language_stats_test.go similarity index 75% rename from modules/git/repo_language_stats_test.go rename to modules/git/languagestats/language_stats_test.go index 12ce958c6e556..e5967c153efa2 100644 --- a/modules/git/repo_language_stats_test.go +++ b/modules/git/languagestats/language_stats_test.go @@ -3,12 +3,12 @@ //go:build !gogit -package git +package languagestats import ( - "path/filepath" "testing" + "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/setting" "github.com/stretchr/testify/assert" @@ -17,13 +17,13 @@ import ( func TestRepository_GetLanguageStats(t *testing.T) { setting.AppDataPath = t.TempDir() - repoPath := filepath.Join(testReposDir, "language_stats_repo") - gitRepo, err := openRepositoryWithDefaultContext(repoPath) + repoPath := "../tests/repos/language_stats_repo" + gitRepo, err := git.OpenRepository(t.Context(), repoPath) require.NoError(t, err) defer gitRepo.Close() - stats, err := gitRepo.GetLanguageStats("8fee858da5796dfb37704761701bb8e800ad9ef3") + stats, err := GetLanguageStats(gitRepo, "8fee858da5796dfb37704761701bb8e800ad9ef3") require.NoError(t, err) assert.Equal(t, map[string]int64{ diff --git a/modules/git/languagestats/main_test.go b/modules/git/languagestats/main_test.go new file mode 100644 index 0000000000000..707d268c818ef --- /dev/null +++ b/modules/git/languagestats/main_test.go @@ -0,0 +1,41 @@ +// Copyright 2025 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package languagestats + +import ( + "context" + "fmt" + "os" + "testing" + + "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/util" +) + +func testRun(m *testing.M) error { + gitHomePath, err := os.MkdirTemp(os.TempDir(), "git-home") + if err != nil { + return fmt.Errorf("unable to create temp dir: %w", err) + } + defer util.RemoveAll(gitHomePath) + setting.Git.HomePath = gitHomePath + + if err = git.InitFull(context.Background()); err != nil { + return fmt.Errorf("failed to call Init: %w", err) + } + + exitCode := m.Run() + if exitCode != 0 { + return fmt.Errorf("run test failed, ExitCode=%d", exitCode) + } + return nil +} + +func TestMain(m *testing.M) { + if err := testRun(m); err != nil { + _, _ = fmt.Fprintf(os.Stderr, "Test failed: %v", err) + os.Exit(1) + } +} diff --git a/modules/indexer/stats/db.go b/modules/indexer/stats/db.go index 067a6f609bdc8..199d493e97d21 100644 --- a/modules/indexer/stats/db.go +++ b/modules/indexer/stats/db.go @@ -8,6 +8,7 @@ import ( repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/git/languagestats" "code.gitea.io/gitea/modules/gitrepo" "code.gitea.io/gitea/modules/graceful" "code.gitea.io/gitea/modules/log" @@ -62,7 +63,7 @@ func (db *DBIndexer) Index(id int64) error { } // Calculate and save language statistics to database - stats, err := gitRepo.GetLanguageStats(commitID) + stats, err := languagestats.GetLanguageStats(gitRepo, commitID) if err != nil { if !setting.IsInTesting { log.Error("Unable to get language stats for ID %s for default branch %s in %s. Error: %v", commitID, repo.DefaultBranch, repo.FullName(), err) diff --git a/routers/web/repo/setting/lfs.go b/routers/web/repo/setting/lfs.go index efda9bda58bd5..8fa21143e3c02 100644 --- a/routers/web/repo/setting/lfs.go +++ b/routers/web/repo/setting/lfs.go @@ -18,6 +18,7 @@ import ( "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/container" "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/git/attribute" "code.gitea.io/gitea/modules/git/pipeline" "code.gitea.io/gitea/modules/lfs" "code.gitea.io/gitea/modules/log" @@ -146,7 +147,7 @@ func LFSLocks(ctx *context.Context) { return } - name2attribute2info, err := gitRepo.CheckAttribute(git.CheckAttributeOpts{ + attributesMap, err := attribute.CheckAttribute(gitRepo, attribute.CheckAttributeOpts{ Attributes: []string{"lockable"}, Filenames: filenames, CachedOnly: true, @@ -159,7 +160,7 @@ func LFSLocks(ctx *context.Context) { lockables := make([]bool, len(lfsLocks)) for i, lock := range lfsLocks { - attribute2info, has := name2attribute2info[lock.Path] + attribute2info, has := attributesMap[lock.Path] if !has { continue } diff --git a/routers/web/repo/view_file.go b/routers/web/repo/view_file.go index 12083a1ced188..93a3a9ff73e72 100644 --- a/routers/web/repo/view_file.go +++ b/routers/web/repo/view_file.go @@ -18,6 +18,7 @@ import ( "code.gitea.io/gitea/modules/actions" "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/git/attribute" "code.gitea.io/gitea/modules/highlight" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/markup" @@ -284,14 +285,16 @@ func prepareToRenderFile(ctx *context.Context, entry *git.TreeEntry) { } if ctx.Repo.GitRepo != nil { - checker, deferable := ctx.Repo.GitRepo.CheckAttributeReader(ctx.Repo.CommitID) - if checker != nil { - defer deferable() - attrs, err := checker.CheckPath(ctx.Repo.TreePath) - if err == nil { - ctx.Data["IsVendored"] = git.AttributeToBool(attrs, git.AttributeLinguistVendored).Value() - ctx.Data["IsGenerated"] = git.AttributeToBool(attrs, git.AttributeLinguistGenerated).Value() - } + checker, deferable, err := attribute.NewBatchChecker(ctx.Repo.GitRepo, ctx.Repo.CommitID) + if err != nil { + ctx.ServerError("NewAttributeChecker", err) + return + } + defer deferable() + attrs, err := checker.CheckPath(ctx.Repo.TreePath) + if err == nil { + ctx.Data["IsVendored"] = attrs.HasVendored().Value() + ctx.Data["IsGenerated"] = attrs.HasGenerated().Value() } } diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index b9781cf8d067b..2d231e4061bcd 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -25,6 +25,7 @@ import ( "code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/git/attribute" "code.gitea.io/gitea/modules/highlight" "code.gitea.io/gitea/modules/lfs" "code.gitea.io/gitea/modules/log" @@ -1237,7 +1238,10 @@ func GetDiffForRender(ctx context.Context, gitRepo *git.Repository, opts *DiffOp return nil, err } - checker, deferrable := gitRepo.CheckAttributeReader(opts.AfterCommitID) + checker, deferrable, err := attribute.NewBatchChecker(gitRepo, opts.AfterCommitID) + if err != nil { + return nil, err + } defer deferrable() for _, diffFile := range diff.Files { @@ -1246,10 +1250,9 @@ func GetDiffForRender(ctx context.Context, gitRepo *git.Repository, opts *DiffOp if checker != nil { attrs, err := checker.CheckPath(diffFile.Name) if err == nil { - isVendored = git.AttributeToBool(attrs, git.AttributeLinguistVendored) - isGenerated = git.AttributeToBool(attrs, git.AttributeLinguistGenerated) - - language := git.TryReadLanguageAttribute(attrs) + isVendored = attrs.HasVendored() + isGenerated = attrs.HasGenerated() + language := attrs.Language() if language.Has() { diffFile.Language = language.Value() } diff --git a/services/repository/files/content.go b/services/repository/files/content.go index e23cd1abce610..470b6b46e0013 100644 --- a/services/repository/files/content.go +++ b/services/repository/files/content.go @@ -12,6 +12,7 @@ import ( repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/git/attribute" "code.gitea.io/gitea/modules/gitrepo" "code.gitea.io/gitea/modules/setting" api "code.gitea.io/gitea/modules/structs" @@ -287,9 +288,9 @@ func TryGetContentLanguage(gitRepo *git.Repository, commitID, treePath string) ( defer deleteTemporaryFile() - filename2attribute2info, err := gitRepo.CheckAttribute(git.CheckAttributeOpts{ + attributesMap, err := attribute.CheckAttribute(gitRepo, attribute.CheckAttributeOpts{ CachedOnly: true, - Attributes: []string{git.AttributeLinguistLanguage, git.AttributeGitlabLanguage}, + Attributes: []string{attribute.LinguistLanguage, attribute.GitlabLanguage}, Filenames: []string{treePath}, IndexFile: indexFilename, WorkTree: worktree, @@ -298,7 +299,7 @@ func TryGetContentLanguage(gitRepo *git.Repository, commitID, treePath string) ( return "", err } - language := git.TryReadLanguageAttribute(filename2attribute2info[treePath]) + language := attributesMap[treePath].Language() return language.Value(), nil } diff --git a/services/repository/files/update.go b/services/repository/files/update.go index 3f6255e77a77c..9db8c492cdde6 100644 --- a/services/repository/files/update.go +++ b/services/repository/files/update.go @@ -15,6 +15,7 @@ import ( repo_model "code.gitea.io/gitea/models/repo" user_model "code.gitea.io/gitea/models/user" "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/git/attribute" "code.gitea.io/gitea/modules/gitrepo" "code.gitea.io/gitea/modules/lfs" "code.gitea.io/gitea/modules/log" @@ -488,7 +489,7 @@ func CreateOrUpdateFile(ctx context.Context, t *TemporaryUploadRepository, file var lfsMetaObject *git_model.LFSMetaObject if setting.LFS.StartServer && hasOldBranch { // Check there is no way this can return multiple infos - filename2attribute2info, err := t.gitRepo.CheckAttribute(git.CheckAttributeOpts{ + attributesMap, err := attribute.CheckAttribute(t.gitRepo, attribute.CheckAttributeOpts{ Attributes: []string{"filter"}, Filenames: []string{file.Options.treePath}, CachedOnly: true, @@ -497,7 +498,7 @@ func CreateOrUpdateFile(ctx context.Context, t *TemporaryUploadRepository, file return err } - if filename2attribute2info[file.Options.treePath] != nil && filename2attribute2info[file.Options.treePath]["filter"] == "lfs" { + if attributesMap[file.Options.treePath] != nil && attributesMap[file.Options.treePath]["filter"] == "lfs" { // OK so we are supposed to LFS this data! pointer, err := lfs.GeneratePointer(treeObjectContentReader) if err != nil { diff --git a/services/repository/files/upload.go b/services/repository/files/upload.go index 2e4ed1744ef7c..a514c6335197b 100644 --- a/services/repository/files/upload.go +++ b/services/repository/files/upload.go @@ -14,6 +14,7 @@ import ( repo_model "code.gitea.io/gitea/models/repo" user_model "code.gitea.io/gitea/models/user" "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/git/attribute" "code.gitea.io/gitea/modules/lfs" "code.gitea.io/gitea/modules/setting" ) @@ -105,9 +106,9 @@ func UploadRepoFiles(ctx context.Context, repo *repo_model.Repository, doer *use } } - var filename2attribute2info map[string]map[string]string + var attributesMap map[string]attribute.Attributes if setting.LFS.StartServer { - filename2attribute2info, err = t.gitRepo.CheckAttribute(git.CheckAttributeOpts{ + attributesMap, err = attribute.CheckAttribute(t.gitRepo, attribute.CheckAttributeOpts{ Attributes: []string{"filter"}, Filenames: names, CachedOnly: true, @@ -119,7 +120,7 @@ func UploadRepoFiles(ctx context.Context, repo *repo_model.Repository, doer *use // Copy uploaded files into repository. for i := range infos { - if err := copyUploadedLFSFileIntoRepository(ctx, &infos[i], filename2attribute2info, t, opts.TreePath); err != nil { + if err := copyUploadedLFSFileIntoRepository(ctx, &infos[i], attributesMap, t, opts.TreePath); err != nil { return err } } @@ -176,7 +177,7 @@ func UploadRepoFiles(ctx context.Context, repo *repo_model.Repository, doer *use return repo_model.DeleteUploads(ctx, uploads...) } -func copyUploadedLFSFileIntoRepository(ctx context.Context, info *uploadInfo, filename2attribute2info map[string]map[string]string, t *TemporaryUploadRepository, treePath string) error { +func copyUploadedLFSFileIntoRepository(ctx context.Context, info *uploadInfo, attributesMap map[string]attribute.Attributes, t *TemporaryUploadRepository, treePath string) error { file, err := os.Open(info.upload.LocalPath()) if err != nil { return err @@ -184,7 +185,7 @@ func copyUploadedLFSFileIntoRepository(ctx context.Context, info *uploadInfo, fi defer file.Close() var objectHash string - if setting.LFS.StartServer && filename2attribute2info[info.upload.Name] != nil && filename2attribute2info[info.upload.Name]["filter"] == "lfs" { + if setting.LFS.StartServer && attributesMap[info.upload.Name] != nil && attributesMap[info.upload.Name]["filter"] == "lfs" { // Handle LFS // FIXME: Inefficient! this should probably happen in models.Upload pointer, err := lfs.GeneratePointer(file) From 44288608a6a2f58ac2c0e5823a2f4dfe9d19172d Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Tue, 8 Apr 2025 13:02:48 -0700 Subject: [PATCH 02/23] some improvements --- modules/git/attribute/attribute.go | 9 ++ modules/git/attribute/batch.go | 90 +++++++++---------- .../languagestats/language_stats_nogogit.go | 4 +- routers/web/repo/view_file.go | 5 +- services/gitdiff/gitdiff.go | 22 ++--- 5 files changed, 67 insertions(+), 63 deletions(-) diff --git a/modules/git/attribute/attribute.go b/modules/git/attribute/attribute.go index 43a1bf3cd8517..516fa0d4ded54 100644 --- a/modules/git/attribute/attribute.go +++ b/modules/git/attribute/attribute.go @@ -20,6 +20,15 @@ const ( GitlabLanguage = "gitlab-language" ) +var LinguistAttributes = []string{ + LinguistVendored, + LinguistGenerated, + LinguistDocumentation, + LinguistDetectable, + LinguistLanguage, + GitlabLanguage, +} + func (a Attribute) ToString() optional.Option[string] { if a != "" && a != "unspecified" { return optional.Some(string(a)) diff --git a/modules/git/attribute/batch.go b/modules/git/attribute/batch.go index 5f255a5fb8203..f8071540aaacc 100644 --- a/modules/git/attribute/batch.go +++ b/modules/git/attribute/batch.go @@ -34,6 +34,48 @@ type BatchChecker struct { cancel context.CancelFunc } +// NewBatchChecker creates a check attribute reader for the current repository and provided commit ID +func NewBatchChecker(repo *git.Repository, treeish string, attributes ...string) (*BatchChecker, error) { + indexFilename, worktree, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(treeish) + if err != nil { + return nil, err + } + + if len(attributes) == 0 { + attributes = LinguistAttributes + } + + ctx, cancel := context.WithCancel(repo.Ctx) + + checker := &BatchChecker{ + Attributes: attributes, + Repo: repo, + IndexFile: indexFilename, + WorkTree: worktree, + ctx: ctx, + cancel: func() { + cancel() + deleteTemporaryFile() + }, + } + + if err := checker.init(ctx); err != nil { + log.Error("Unable to open attribute checker for commit %s, error: %v", treeish, err) + checker.Close() + return nil, err + } + + go func() { + err := checker.run(ctx) + if err != nil && !git.IsErrCanceledOrKilled(err) { + log.Error("Attribute checker for commit %s exits with error: %v", treeish, err) + } + cancel() + }() + + return checker, nil +} + // init initializes the AttributeChecker func (c *BatchChecker) init(ctx context.Context) error { if len(c.Attributes) == 0 { @@ -46,7 +88,6 @@ func (c *BatchChecker) init(ctx context.Context) error { return errors.New("no provided Attributes to check") } - c.ctx, c.cancel = context.WithCancel(ctx) c.cmd = git.NewCommand("check-attr", "--stdin", "-z") if len(c.IndexFile) > 0 { @@ -77,13 +118,13 @@ func (c *BatchChecker) init(ctx context.Context) error { return nil } -func (c *BatchChecker) run() error { +func (c *BatchChecker) run(ctx context.Context) error { defer func() { _ = c.stdinReader.Close() _ = c.stdOut.Close() }() stdErr := new(bytes.Buffer) - err := c.cmd.Run(c.ctx, &git.RunOpts{ + err := c.cmd.Run(ctx, &git.RunOpts{ Env: c.env, Dir: c.Repo.Path, Stdin: c.stdinReader, @@ -221,46 +262,3 @@ func (wr *nulSeparatedAttributeWriter) Close() error { close(wr.closed) return nil } - -// NewBatchChecker creates a check attribute reader for the current repository and provided commit ID -func NewBatchChecker(repo *git.Repository, commitID string) (*BatchChecker, context.CancelFunc, error) { - indexFilename, worktree, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID) - if err != nil { - return nil, nil, err - } - - checker := &BatchChecker{ - Attributes: []string{ - LinguistVendored, - LinguistGenerated, - LinguistDocumentation, - LinguistDetectable, - LinguistLanguage, - GitlabLanguage, - }, - Repo: repo, - IndexFile: indexFilename, - WorkTree: worktree, - } - ctx, cancel := context.WithCancel(repo.Ctx) - deferrable := func() { - _ = checker.Close() - cancel() - deleteTemporaryFile() - } - if err := checker.init(ctx); err != nil { - log.Error("Unable to open attribute checker for commit %s, error: %v", commitID, err) - deferrable() - return nil, nil, err - } - - go func() { - err := checker.run() - if err != nil && !git.IsErrCanceledOrKilled(err) { - log.Error("Attribute checker for commit %s exits with error: %v", commitID, err) - } - cancel() - }() - - return checker, deferrable, nil -} diff --git a/modules/git/languagestats/language_stats_nogogit.go b/modules/git/languagestats/language_stats_nogogit.go index 61ee4e677d7c4..035cd93b0af32 100644 --- a/modules/git/languagestats/language_stats_nogogit.go +++ b/modules/git/languagestats/language_stats_nogogit.go @@ -64,11 +64,11 @@ func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64, return nil, err } - checker, deferable, err := attribute.NewBatchChecker(repo, commitID) + checker, err := attribute.NewBatchChecker(repo, commitID) if err != nil { return nil, err } - defer deferable() + defer checker.Close() contentBuf := bytes.Buffer{} var content []byte diff --git a/routers/web/repo/view_file.go b/routers/web/repo/view_file.go index 93a3a9ff73e72..091693bc820c2 100644 --- a/routers/web/repo/view_file.go +++ b/routers/web/repo/view_file.go @@ -285,12 +285,13 @@ func prepareToRenderFile(ctx *context.Context, entry *git.TreeEntry) { } if ctx.Repo.GitRepo != nil { - checker, deferable, err := attribute.NewBatchChecker(ctx.Repo.GitRepo, ctx.Repo.CommitID) + checker, err := attribute.NewBatchChecker(ctx.Repo.GitRepo, ctx.Repo.CommitID, attribute.LinguistGenerated, attribute.LinguistVendored) if err != nil { ctx.ServerError("NewAttributeChecker", err) return } - defer deferable() + defer checker.Close() + attrs, err := checker.CheckPath(ctx.Repo.TreePath) if err == nil { ctx.Data["IsVendored"] = attrs.HasVendored().Value() diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index 2d231e4061bcd..5d6dac3e7b60b 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -1238,26 +1238,22 @@ func GetDiffForRender(ctx context.Context, gitRepo *git.Repository, opts *DiffOp return nil, err } - checker, deferrable, err := attribute.NewBatchChecker(gitRepo, opts.AfterCommitID) + checker, err := attribute.NewBatchChecker(gitRepo, opts.AfterCommitID) if err != nil { return nil, err } - defer deferrable() + defer checker.Close() for _, diffFile := range diff.Files { isVendored := optional.None[bool]() isGenerated := optional.None[bool]() - if checker != nil { - attrs, err := checker.CheckPath(diffFile.Name) - if err == nil { - isVendored = attrs.HasVendored() - isGenerated = attrs.HasGenerated() - language := attrs.Language() - if language.Has() { - diffFile.Language = language.Value() - } - } else { - checker = nil // CheckPath fails, it's not impossible to "check" anymore + attrs, err := checker.CheckPath(diffFile.Name) + if err == nil { + isVendored = attrs.HasVendored() + isGenerated = attrs.HasGenerated() + language := attrs.Language() + if language.Has() { + diffFile.Language = language.Value() } } From 419b959dfe0791087a129a2b26af08bc93ad1d44 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Tue, 8 Apr 2025 16:23:53 -0700 Subject: [PATCH 03/23] Support run attr check on bare repository if git version >= 2.40 --- modules/git/attribute/batch.go | 44 +++------ modules/git/attribute/checker.go | 91 ++++++++++--------- modules/git/git.go | 2 + modules/git/languagestats/language_stats.go | 17 ++++ .../git/languagestats/language_stats_gogit.go | 6 +- routers/web/repo/blame.go | 4 +- routers/web/repo/setting/lfs.go | 25 ++--- routers/web/repo/view_file.go | 4 +- services/markup/renderhelper_codepreview.go | 4 +- services/repository/files/content.go | 26 ------ services/repository/files/update.go | 3 +- services/repository/files/upload.go | 3 +- 12 files changed, 99 insertions(+), 130 deletions(-) diff --git a/modules/git/attribute/batch.go b/modules/git/attribute/batch.go index f8071540aaacc..6451bc0d452bf 100644 --- a/modules/git/attribute/batch.go +++ b/modules/git/attribute/batch.go @@ -22,8 +22,7 @@ type BatchChecker struct { // params Attributes []string Repo *git.Repository - IndexFile string - WorkTree string + Treeish string stdinReader io.ReadCloser stdinWriter *os.File @@ -36,27 +35,13 @@ type BatchChecker struct { // NewBatchChecker creates a check attribute reader for the current repository and provided commit ID func NewBatchChecker(repo *git.Repository, treeish string, attributes ...string) (*BatchChecker, error) { - indexFilename, worktree, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(treeish) - if err != nil { - return nil, err - } - - if len(attributes) == 0 { - attributes = LinguistAttributes - } - ctx, cancel := context.WithCancel(repo.Ctx) - checker := &BatchChecker{ Attributes: attributes, Repo: repo, - IndexFile: indexFilename, - WorkTree: worktree, + Treeish: treeish, ctx: ctx, - cancel: func() { - cancel() - deleteTemporaryFile() - }, + cancel: cancel, } if err := checker.init(ctx); err != nil { @@ -88,22 +73,19 @@ func (c *BatchChecker) init(ctx context.Context) error { return errors.New("no provided Attributes to check") } - c.cmd = git.NewCommand("check-attr", "--stdin", "-z") - - if len(c.IndexFile) > 0 { - c.cmd.AddArguments("--cached") - c.env = append(c.env, "GIT_INDEX_FILE="+c.IndexFile) + cmd, envs, cancel, err := checkAttrCommand(c.Repo, c.Treeish, nil, c.Attributes) + if err != nil { + c.cancel() + return err } - - if len(c.WorkTree) > 0 { - c.env = append(c.env, "GIT_WORK_TREE="+c.WorkTree) + c.cmd = cmd + c.env = envs + c.cancel = func() { + cancel() + c.cancel() } - c.env = append(c.env, "GIT_FLUSH=1") - - c.cmd.AddDynamicArguments(c.Attributes...) - - var err error + c.cmd.AddArguments("--stdin") c.stdinReader, c.stdinWriter, err = os.Pipe() if err != nil { diff --git a/modules/git/attribute/checker.go b/modules/git/attribute/checker.go index 62a2d138e9612..d878583a0f17c 100644 --- a/modules/git/attribute/checker.go +++ b/modules/git/attribute/checker.go @@ -5,6 +5,7 @@ package attribute import ( "bytes" + "context" "errors" "fmt" "os" @@ -12,81 +13,83 @@ import ( "code.gitea.io/gitea/modules/git" ) -// CheckAttributeOpts represents the possible options to CheckAttribute -type CheckAttributeOpts struct { - CachedOnly bool - AllAttributes bool - Attributes []string - Filenames []string - IndexFile string - WorkTree string -} - -// CheckAttribute return the Blame object of file -func CheckAttribute(repo *git.Repository, opts CheckAttributeOpts) (map[string]Attributes, error) { - env := []string{} - - if len(opts.IndexFile) > 0 { - env = append(env, "GIT_INDEX_FILE="+opts.IndexFile) - } - if len(opts.WorkTree) > 0 { - env = append(env, "GIT_WORK_TREE="+opts.WorkTree) +func checkAttrCommand(gitRepo *git.Repository, treeish string, filenames, attributes []string) (*git.Command, []string, func(), error) { + cmd := git.NewCommand("check-attr", "-z") + if len(attributes) == 0 { + cmd.AddArguments("--all") } - - if len(env) > 0 { - env = append(os.Environ(), env...) + cmd.AddDashesAndList(filenames...) + if git.DefaultFeatures().SupportCheckAttrOnBare && treeish != "" { + cmd.AddArguments("--source") + cmd.AddDynamicArguments(treeish) + cmd.AddDynamicArguments(attributes...) + return cmd, []string{"GIT_FLUSH=1"}, nil, nil } - stdOut := new(bytes.Buffer) - stdErr := new(bytes.Buffer) - - cmd := git.NewCommand("check-attr", "-z") + var cancel func() + var envs []string + if treeish != "" { // if it's empty, then we assume it's a worktree repository + indexFilename, worktree, deleteTemporaryFile, err := gitRepo.ReadTreeToTemporaryIndex(treeish) + if err != nil { + return nil, nil, nil, err + } - if opts.AllAttributes { - cmd.AddArguments("-a") - } else { - for _, attribute := range opts.Attributes { - if attribute != "" { - cmd.AddDynamicArguments(attribute) - } + envs = []string{ + "GIT_INDEX_FILE=" + indexFilename, + "GIT_WORK_TREE=" + worktree, + "GIT_FLUSH=1", } + cancel = deleteTemporaryFile } + cmd.AddArguments("--cached") + cmd.AddDynamicArguments(attributes...) + return cmd, envs, cancel, nil +} + +type CheckAttributeOpts struct { + Filenames []string + Attributes []string +} - if opts.CachedOnly { - cmd.AddArguments("--cached") +// CheckAttribute return the Blame object of file +func CheckAttribute(ctx context.Context, gitRepo *git.Repository, treeish string, opts CheckAttributeOpts) (map[string]Attributes, error) { + cmd, envs, cancel, err := checkAttrCommand(gitRepo, treeish, opts.Filenames, opts.Attributes) + if err != nil { + return nil, err } + defer cancel() - cmd.AddDashesAndList(opts.Filenames...) + stdOut := new(bytes.Buffer) + stdErr := new(bytes.Buffer) - if err := cmd.Run(repo.Ctx, &git.RunOpts{ - Env: env, - Dir: repo.Path, + if err := cmd.Run(ctx, &git.RunOpts{ + Env: append(os.Environ(), envs...), + Dir: gitRepo.Path, Stdout: stdOut, Stderr: stdErr, }); err != nil { return nil, fmt.Errorf("failed to run check-attr: %w\n%s\n%s", err, stdOut.String(), stdErr.String()) } - // FIXME: This is incorrect on versions < 1.8.5 fields := bytes.Split(stdOut.Bytes(), []byte{'\000'}) if len(fields)%3 != 1 { return nil, errors.New("wrong number of fields in return from check-attr") } - name2attribute2info := make(map[string]Attributes) + attributesMap := make(map[string]Attributes) for i := 0; i < (len(fields) / 3); i++ { filename := string(fields[3*i]) attribute := string(fields[3*i+1]) info := string(fields[3*i+2]) - attribute2info := name2attribute2info[filename] + attribute2info := attributesMap[filename] if attribute2info == nil { attribute2info = make(Attributes) } attribute2info[attribute] = Attribute(info) - name2attribute2info[filename] = attribute2info + attributesMap[filename] = attribute2info } - return name2attribute2info, nil + return attributesMap, nil } diff --git a/modules/git/git.go b/modules/git/git.go index 2b593910a2dc0..a2ffd6d289880 100644 --- a/modules/git/git.go +++ b/modules/git/git.go @@ -30,6 +30,7 @@ type Features struct { SupportProcReceive bool // >= 2.29 SupportHashSha256 bool // >= 2.42, SHA-256 repositories no longer an ‘experimental curiosity’ SupportedObjectFormats []ObjectFormat // sha1, sha256 + SupportCheckAttrOnBare bool // >= 2.40 } var ( @@ -77,6 +78,7 @@ func loadGitVersionFeatures() (*Features, error) { if features.SupportHashSha256 { features.SupportedObjectFormats = append(features.SupportedObjectFormats, Sha256ObjectFormat) } + features.SupportCheckAttrOnBare = features.CheckVersionAtLeast("2.40") return features, nil } diff --git a/modules/git/languagestats/language_stats.go b/modules/git/languagestats/language_stats.go index fe6c95d41cf64..16fd09e03fc9f 100644 --- a/modules/git/languagestats/language_stats.go +++ b/modules/git/languagestats/language_stats.go @@ -4,8 +4,12 @@ package languagestats import ( + "context" "strings" "unicode" + + "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/git/attribute" ) const ( @@ -46,3 +50,16 @@ func mergeLanguageStats(stats map[string]int64) map[string]int64 { } return res } + +// GetFileLanguage tries to get the (linguist) language of the file content +func GetFileLanguage(ctx context.Context, gitRepo *git.Repository, treeish, treePath string) (string, error) { + attributesMap, err := attribute.CheckAttribute(ctx, gitRepo, treeish, attribute.CheckAttributeOpts{ + Attributes: []string{attribute.LinguistLanguage, attribute.GitlabLanguage}, + Filenames: []string{treePath}, + }) + if err != nil { + return "", err + } + + return attributesMap[treePath].Language().Value(), nil +} diff --git a/modules/git/languagestats/language_stats_gogit.go b/modules/git/languagestats/language_stats_gogit.go index 0006f8032ccdd..1573999d6db33 100644 --- a/modules/git/languagestats/language_stats_gogit.go +++ b/modules/git/languagestats/language_stats_gogit.go @@ -10,6 +10,8 @@ import ( "io" "code.gitea.io/gitea/modules/analyze" + "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/git/attribute" "code.gitea.io/gitea/modules/optional" "github.com/go-enry/go-enry/v2" @@ -19,7 +21,7 @@ import ( ) // GetLanguageStats calculates language stats for git repository at specified commit -func GetLanguageStats(repo *Repository, commitID string) (map[string]int64, error) { +func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64, error) { r, err := git.PlainOpen(repo.Path) if err != nil { return nil, err @@ -40,7 +42,7 @@ func GetLanguageStats(repo *Repository, commitID string) (map[string]int64, erro return nil, err } - checker, deferable, err := NewAttributeChecker(repo, commitID) + checker, deferable, err := attribute.NewBatchChecker(repo, commitID) if err != nil { return nil, err } diff --git a/routers/web/repo/blame.go b/routers/web/repo/blame.go index efd85b9452798..e125267524c7b 100644 --- a/routers/web/repo/blame.go +++ b/routers/web/repo/blame.go @@ -15,13 +15,13 @@ import ( user_model "code.gitea.io/gitea/models/user" "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/git/languagestats" "code.gitea.io/gitea/modules/highlight" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/templates" "code.gitea.io/gitea/modules/util" "code.gitea.io/gitea/services/context" - files_service "code.gitea.io/gitea/services/repository/files" ) type blameRow struct { @@ -234,7 +234,7 @@ func processBlameParts(ctx *context.Context, blameParts []*git.BlamePart) map[st func renderBlame(ctx *context.Context, blameParts []*git.BlamePart, commitNames map[string]*user_model.UserCommit) { repoLink := ctx.Repo.RepoLink - language, err := files_service.TryGetContentLanguage(ctx.Repo.GitRepo, ctx.Repo.CommitID, ctx.Repo.TreePath) + language, err := languagestats.GetFileLanguage(ctx, ctx.Repo.GitRepo, ctx.Repo.CommitID, ctx.Repo.TreePath) if err != nil { log.Error("Unable to get file language for %-v:%s. Error: %v", ctx.Repo.Repository, ctx.Repo.TreePath, err) } diff --git a/routers/web/repo/setting/lfs.go b/routers/web/repo/setting/lfs.go index 8fa21143e3c02..052b448eb5a36 100644 --- a/routers/web/repo/setting/lfs.go +++ b/routers/web/repo/setting/lfs.go @@ -135,39 +135,30 @@ func LFSLocks(ctx *context.Context) { } defer gitRepo.Close() - filenames := make([]string, len(lfsLocks)) - - for i, lock := range lfsLocks { - filenames[i] = lock.Path - } - if err := gitRepo.ReadTreeToIndex(ctx.Repo.Repository.DefaultBranch); err != nil { log.Error("Unable to read the default branch to the index: %s (%v)", ctx.Repo.Repository.DefaultBranch, err) ctx.ServerError("LFSLocks", fmt.Errorf("unable to read the default branch to the index: %s (%w)", ctx.Repo.Repository.DefaultBranch, err)) return } - attributesMap, err := attribute.CheckAttribute(gitRepo, attribute.CheckAttributeOpts{ - Attributes: []string{"lockable"}, - Filenames: filenames, - CachedOnly: true, - }) + checker, err := attribute.NewBatchChecker(gitRepo, "", "lockable") if err != nil { log.Error("Unable to check attributes in %s (%v)", tmpBasePath, err) ctx.ServerError("LFSLocks", err) return } + defer checker.Close() lockables := make([]bool, len(lfsLocks)) + filenames := make([]string, len(lfsLocks)) for i, lock := range lfsLocks { - attribute2info, has := attributesMap[lock.Path] - if !has { - continue - } - if attribute2info["lockable"] != "set" { + filenames[i] = lock.Path + attrs, err := checker.CheckPath(lock.Path) + if err != nil { + log.Error("Unable to check attributes in %s: %s (%v)", tmpBasePath, lock.Path, err) continue } - lockables[i] = true + lockables[i] = attrs["lockable"] == "set" } ctx.Data["Lockables"] = lockables diff --git a/routers/web/repo/view_file.go b/routers/web/repo/view_file.go index 091693bc820c2..65f8afdcd6d83 100644 --- a/routers/web/repo/view_file.go +++ b/routers/web/repo/view_file.go @@ -19,6 +19,7 @@ import ( "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/git/attribute" + "code.gitea.io/gitea/modules/git/languagestats" "code.gitea.io/gitea/modules/highlight" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/markup" @@ -26,7 +27,6 @@ import ( "code.gitea.io/gitea/modules/util" "code.gitea.io/gitea/services/context" issue_service "code.gitea.io/gitea/services/issue" - files_service "code.gitea.io/gitea/services/repository/files" "github.com/nektos/act/pkg/model" ) @@ -210,7 +210,7 @@ func prepareToRenderFile(ctx *context.Context, entry *git.TreeEntry) { ctx.Data["NumLines"] = bytes.Count(buf, []byte{'\n'}) + 1 } - language, err := files_service.TryGetContentLanguage(ctx.Repo.GitRepo, ctx.Repo.CommitID, ctx.Repo.TreePath) + language, err := languagestats.GetFileLanguage(ctx, ctx.Repo.GitRepo, ctx.Repo.CommitID, ctx.Repo.TreePath) if err != nil { log.Error("Unable to get file language for %-v:%s. Error: %v", ctx.Repo.Repository, ctx.Repo.TreePath, err) } diff --git a/services/markup/renderhelper_codepreview.go b/services/markup/renderhelper_codepreview.go index 28d11209846d8..fa1eb824a2f54 100644 --- a/services/markup/renderhelper_codepreview.go +++ b/services/markup/renderhelper_codepreview.go @@ -14,13 +14,13 @@ import ( "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/models/unit" "code.gitea.io/gitea/modules/charset" + "code.gitea.io/gitea/modules/git/languagestats" "code.gitea.io/gitea/modules/gitrepo" "code.gitea.io/gitea/modules/indexer/code" "code.gitea.io/gitea/modules/markup" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/util" gitea_context "code.gitea.io/gitea/services/context" - "code.gitea.io/gitea/services/repository/files" ) func renderRepoFileCodePreview(ctx context.Context, opts markup.RenderCodePreviewOptions) (template.HTML, error) { @@ -61,7 +61,7 @@ func renderRepoFileCodePreview(ctx context.Context, opts markup.RenderCodePrevie return "", err } - language, _ := files.TryGetContentLanguage(gitRepo, opts.CommitID, opts.FilePath) + language, _ := languagestats.GetFileLanguage(ctx, gitRepo, opts.CommitID, opts.FilePath) blob, err := commit.GetBlobByPath(opts.FilePath) if err != nil { return "", err diff --git a/services/repository/files/content.go b/services/repository/files/content.go index 470b6b46e0013..0327e7f2cebeb 100644 --- a/services/repository/files/content.go +++ b/services/repository/files/content.go @@ -12,7 +12,6 @@ import ( repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/git" - "code.gitea.io/gitea/modules/git/attribute" "code.gitea.io/gitea/modules/gitrepo" "code.gitea.io/gitea/modules/setting" api "code.gitea.io/gitea/modules/structs" @@ -278,28 +277,3 @@ func GetBlobBySHA(ctx context.Context, repo *repo_model.Repository, gitRepo *git Content: content, }, nil } - -// TryGetContentLanguage tries to get the (linguist) language of the file content -func TryGetContentLanguage(gitRepo *git.Repository, commitID, treePath string) (string, error) { - indexFilename, worktree, deleteTemporaryFile, err := gitRepo.ReadTreeToTemporaryIndex(commitID) - if err != nil { - return "", err - } - - defer deleteTemporaryFile() - - attributesMap, err := attribute.CheckAttribute(gitRepo, attribute.CheckAttributeOpts{ - CachedOnly: true, - Attributes: []string{attribute.LinguistLanguage, attribute.GitlabLanguage}, - Filenames: []string{treePath}, - IndexFile: indexFilename, - WorkTree: worktree, - }) - if err != nil { - return "", err - } - - language := attributesMap[treePath].Language() - - return language.Value(), nil -} diff --git a/services/repository/files/update.go b/services/repository/files/update.go index 9db8c492cdde6..bbc3729ec0ce3 100644 --- a/services/repository/files/update.go +++ b/services/repository/files/update.go @@ -489,10 +489,9 @@ func CreateOrUpdateFile(ctx context.Context, t *TemporaryUploadRepository, file var lfsMetaObject *git_model.LFSMetaObject if setting.LFS.StartServer && hasOldBranch { // Check there is no way this can return multiple infos - attributesMap, err := attribute.CheckAttribute(t.gitRepo, attribute.CheckAttributeOpts{ + attributesMap, err := attribute.CheckAttribute(ctx, t.gitRepo, "", attribute.CheckAttributeOpts{ Attributes: []string{"filter"}, Filenames: []string{file.Options.treePath}, - CachedOnly: true, }) if err != nil { return err diff --git a/services/repository/files/upload.go b/services/repository/files/upload.go index a514c6335197b..f4f41cca69269 100644 --- a/services/repository/files/upload.go +++ b/services/repository/files/upload.go @@ -108,10 +108,9 @@ func UploadRepoFiles(ctx context.Context, repo *repo_model.Repository, doer *use var attributesMap map[string]attribute.Attributes if setting.LFS.StartServer { - attributesMap, err = attribute.CheckAttribute(t.gitRepo, attribute.CheckAttributeOpts{ + attributesMap, err = attribute.CheckAttribute(ctx, t.gitRepo, "", attribute.CheckAttributeOpts{ Attributes: []string{"filter"}, Filenames: names, - CachedOnly: true, }) if err != nil { return err From ed1601a196605b0f20147cc35696b1c3cb5a92a0 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Tue, 8 Apr 2025 18:07:01 -0700 Subject: [PATCH 04/23] Fix builg gogit --- modules/git/languagestats/language_stats_gogit.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/git/languagestats/language_stats_gogit.go b/modules/git/languagestats/language_stats_gogit.go index 1573999d6db33..05a070efc0727 100644 --- a/modules/git/languagestats/language_stats_gogit.go +++ b/modules/git/languagestats/language_stats_gogit.go @@ -10,7 +10,7 @@ import ( "io" "code.gitea.io/gitea/modules/analyze" - "code.gitea.io/gitea/modules/git" + git_module "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/git/attribute" "code.gitea.io/gitea/modules/optional" @@ -21,7 +21,7 @@ import ( ) // GetLanguageStats calculates language stats for git repository at specified commit -func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64, error) { +func GetLanguageStats(repo *git_module.Repository, commitID string) (map[string]int64, error) { r, err := git.PlainOpen(repo.Path) if err != nil { return nil, err @@ -42,11 +42,11 @@ func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64, return nil, err } - checker, deferable, err := attribute.NewBatchChecker(repo, commitID) + checker, err := attribute.NewBatchChecker(repo, commitID) if err != nil { return nil, err } - defer deferable() + defer checker.Close() // sizes contains the current calculated size of all files by language sizes := make(map[string]int64) From 0fc7704a301045c797c7dc716224e938da55c8e3 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Tue, 8 Apr 2025 18:36:53 -0700 Subject: [PATCH 05/23] Fix lint --- modules/git/attribute/batch.go | 4 ++-- services/gitdiff/gitdiff.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/git/attribute/batch.go b/modules/git/attribute/batch.go index 6451bc0d452bf..975b08059673d 100644 --- a/modules/git/attribute/batch.go +++ b/modules/git/attribute/batch.go @@ -44,7 +44,7 @@ func NewBatchChecker(repo *git.Repository, treeish string, attributes ...string) cancel: cancel, } - if err := checker.init(ctx); err != nil { + if err := checker.init(); err != nil { log.Error("Unable to open attribute checker for commit %s, error: %v", treeish, err) checker.Close() return nil, err @@ -62,7 +62,7 @@ func NewBatchChecker(repo *git.Repository, treeish string, attributes ...string) } // init initializes the AttributeChecker -func (c *BatchChecker) init(ctx context.Context) error { +func (c *BatchChecker) init() error { if len(c.Attributes) == 0 { lw := new(nulSeparatedAttributeWriter) lw.attributes = make(chan attributeTriple) diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index 5d6dac3e7b60b..46b525f090b39 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -1238,7 +1238,7 @@ func GetDiffForRender(ctx context.Context, gitRepo *git.Repository, opts *DiffOp return nil, err } - checker, err := attribute.NewBatchChecker(gitRepo, opts.AfterCommitID) + checker, err := attribute.NewBatchChecker(gitRepo, opts.AfterCommitID, attribute.LinguistVendored, attribute.LinguistGenerated, attribute.LinguistLanguage, attribute.GitlabLanguage) if err != nil { return nil, err } From 6da7152bc1cd5df8bc1954d60462d5fdaf84e6e1 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Tue, 8 Apr 2025 22:16:10 -0700 Subject: [PATCH 06/23] Fix bug --- modules/git/attribute/checker.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/git/attribute/checker.go b/modules/git/attribute/checker.go index d878583a0f17c..38fac9789f288 100644 --- a/modules/git/attribute/checker.go +++ b/modules/git/attribute/checker.go @@ -19,14 +19,14 @@ func checkAttrCommand(gitRepo *git.Repository, treeish string, filenames, attrib cmd.AddArguments("--all") } cmd.AddDashesAndList(filenames...) + cancel := func() {} if git.DefaultFeatures().SupportCheckAttrOnBare && treeish != "" { cmd.AddArguments("--source") cmd.AddDynamicArguments(treeish) cmd.AddDynamicArguments(attributes...) - return cmd, []string{"GIT_FLUSH=1"}, nil, nil + return cmd, []string{"GIT_FLUSH=1"}, cancel, nil } - var cancel func() var envs []string if treeish != "" { // if it's empty, then we assume it's a worktree repository indexFilename, worktree, deleteTemporaryFile, err := gitRepo.ReadTreeToTemporaryIndex(treeish) From 2a3571de281b62645e734b88f5774901397e4886 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Wed, 9 Apr 2025 12:22:29 -0700 Subject: [PATCH 07/23] Fix bug --- modules/git/attribute/batch.go | 3 +++ modules/git/attribute/checker.go | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/modules/git/attribute/batch.go b/modules/git/attribute/batch.go index 975b08059673d..e190c2f1fa614 100644 --- a/modules/git/attribute/batch.go +++ b/modules/git/attribute/batch.go @@ -36,6 +36,9 @@ type BatchChecker struct { // NewBatchChecker creates a check attribute reader for the current repository and provided commit ID func NewBatchChecker(repo *git.Repository, treeish string, attributes ...string) (*BatchChecker, error) { ctx, cancel := context.WithCancel(repo.Ctx) + if len(attributes) == 0 { + attributes = LinguistAttributes + } checker := &BatchChecker{ Attributes: attributes, Repo: repo, diff --git a/modules/git/attribute/checker.go b/modules/git/attribute/checker.go index 38fac9789f288..7123fb3befc6b 100644 --- a/modules/git/attribute/checker.go +++ b/modules/git/attribute/checker.go @@ -18,7 +18,9 @@ func checkAttrCommand(gitRepo *git.Repository, treeish string, filenames, attrib if len(attributes) == 0 { cmd.AddArguments("--all") } - cmd.AddDashesAndList(filenames...) + if len(filenames) > 0 { + cmd.AddDashesAndList(filenames...) + } cancel := func() {} if git.DefaultFeatures().SupportCheckAttrOnBare && treeish != "" { cmd.AddArguments("--source") From 33b1cebdd63c66c0077e306a01dcc5760bf683e1 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Wed, 9 Apr 2025 15:59:20 -0700 Subject: [PATCH 08/23] Fix bug --- modules/git/attribute/batch.go | 67 +++++++++++----------------------- 1 file changed, 22 insertions(+), 45 deletions(-) diff --git a/modules/git/attribute/batch.go b/modules/git/attribute/batch.go index e190c2f1fa614..eecc0454f2820 100644 --- a/modules/git/attribute/batch.go +++ b/modules/git/attribute/batch.go @@ -6,7 +6,6 @@ package attribute import ( "bytes" "context" - "errors" "fmt" "io" "os" @@ -39,70 +38,48 @@ func NewBatchChecker(repo *git.Repository, treeish string, attributes ...string) if len(attributes) == 0 { attributes = LinguistAttributes } + cmd, envs, cleanup, err := checkAttrCommand(repo, treeish, nil, attributes) + if err != nil { + cancel() + return nil, err + } + cmd.AddArguments("--stdin") + checker := &BatchChecker{ Attributes: attributes, Repo: repo, Treeish: treeish, ctx: ctx, - cancel: cancel, + cancel: func() { + cancel() + cleanup() + }, + cmd: cmd, + env: envs, } - if err := checker.init(); err != nil { - log.Error("Unable to open attribute checker for commit %s, error: %v", treeish, err) - checker.Close() + checker.stdinReader, checker.stdinWriter, err = os.Pipe() + if err != nil { + checker.cancel() return nil, err } + lw := new(nulSeparatedAttributeWriter) + lw.attributes = make(chan attributeTriple, 5) + lw.closed = make(chan struct{}) + checker.stdOut = lw + go func() { err := checker.run(ctx) if err != nil && !git.IsErrCanceledOrKilled(err) { log.Error("Attribute checker for commit %s exits with error: %v", treeish, err) } - cancel() + checker.cancel() }() return checker, nil } -// init initializes the AttributeChecker -func (c *BatchChecker) init() error { - if len(c.Attributes) == 0 { - lw := new(nulSeparatedAttributeWriter) - lw.attributes = make(chan attributeTriple) - lw.closed = make(chan struct{}) - - c.stdOut = lw - c.stdOut.Close() - return errors.New("no provided Attributes to check") - } - - cmd, envs, cancel, err := checkAttrCommand(c.Repo, c.Treeish, nil, c.Attributes) - if err != nil { - c.cancel() - return err - } - c.cmd = cmd - c.env = envs - c.cancel = func() { - cancel() - c.cancel() - } - - c.cmd.AddArguments("--stdin") - - c.stdinReader, c.stdinWriter, err = os.Pipe() - if err != nil { - c.cancel() - return err - } - - lw := new(nulSeparatedAttributeWriter) - lw.attributes = make(chan attributeTriple, 5) - lw.closed = make(chan struct{}) - c.stdOut = lw - return nil -} - func (c *BatchChecker) run(ctx context.Context) error { defer func() { _ = c.stdinReader.Close() From 1f473af6e1173de266cd0621252a838194f4531a Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Wed, 9 Apr 2025 17:43:46 -0700 Subject: [PATCH 09/23] Fix bug --- modules/git/attribute/checker.go | 43 ++++++++++++++++---------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/modules/git/attribute/checker.go b/modules/git/attribute/checker.go index 7123fb3befc6b..45049ffdfd503 100644 --- a/modules/git/attribute/checker.go +++ b/modules/git/attribute/checker.go @@ -18,33 +18,34 @@ func checkAttrCommand(gitRepo *git.Repository, treeish string, filenames, attrib if len(attributes) == 0 { cmd.AddArguments("--all") } - if len(filenames) > 0 { - cmd.AddDashesAndList(filenames...) - } cancel := func() {} - if git.DefaultFeatures().SupportCheckAttrOnBare && treeish != "" { - cmd.AddArguments("--source") - cmd.AddDynamicArguments(treeish) - cmd.AddDynamicArguments(attributes...) - return cmd, []string{"GIT_FLUSH=1"}, cancel, nil - } + envs := []string{"GIT_FLUSH=1"} - var envs []string - if treeish != "" { // if it's empty, then we assume it's a worktree repository - indexFilename, worktree, deleteTemporaryFile, err := gitRepo.ReadTreeToTemporaryIndex(treeish) - if err != nil { - return nil, nil, nil, err - } + // if there is a treeish, we asssume this is a bare repository + // if it's empty, then we assume it's a worktree repository + if treeish != "" { + if git.DefaultFeatures().SupportCheckAttrOnBare { + cmd.AddArguments("--source") + cmd.AddDynamicArguments(treeish) + } else { + indexFilename, worktree, deleteTemporaryFile, err := gitRepo.ReadTreeToTemporaryIndex(treeish) + if err != nil { + return nil, nil, nil, err + } - envs = []string{ - "GIT_INDEX_FILE=" + indexFilename, - "GIT_WORK_TREE=" + worktree, - "GIT_FLUSH=1", + cmd.AddArguments("--cached") + envs = append(envs, + "GIT_INDEX_FILE="+indexFilename, + "GIT_WORK_TREE="+worktree, + ) + cancel = deleteTemporaryFile } - cancel = deleteTemporaryFile } - cmd.AddArguments("--cached") + cmd.AddDynamicArguments(attributes...) + if len(filenames) > 0 { + cmd.AddDashesAndList(filenames...) + } return cmd, envs, cancel, nil } From 1c4821239564f13c17f7d8f037ce4baef2c06a5b Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Wed, 9 Apr 2025 17:54:43 -0700 Subject: [PATCH 10/23] Fix test --- modules/git/attribute/checker.go | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/modules/git/attribute/checker.go b/modules/git/attribute/checker.go index 45049ffdfd503..7853de5936d58 100644 --- a/modules/git/attribute/checker.go +++ b/modules/git/attribute/checker.go @@ -9,20 +9,33 @@ import ( "errors" "fmt" "os" + "strconv" "code.gitea.io/gitea/modules/git" ) func checkAttrCommand(gitRepo *git.Repository, treeish string, filenames, attributes []string) (*git.Command, []string, func(), error) { + cancel := func() {} + envs := []string{"GIT_FLUSH=1"} + + // Check if the repository is bare + res, _, err := git.NewCommand("rev-parse", "--is-bare-repository").RunStdString(gitRepo.Ctx, &git.RunOpts{ + Dir: gitRepo.Path, + }) + if err != nil { + return nil, nil, nil, fmt.Errorf("failed to run rev-parse: %w", err) + } + isBare, _ := strconv.ParseBool(res) + // bare repository must have a treeish + if isBare && treeish == "" { + return nil, nil, nil, fmt.Errorf("bare repository must have a treeish") + } + cmd := git.NewCommand("check-attr", "-z") if len(attributes) == 0 { cmd.AddArguments("--all") } - cancel := func() {} - envs := []string{"GIT_FLUSH=1"} - // if there is a treeish, we asssume this is a bare repository - // if it's empty, then we assume it's a worktree repository if treeish != "" { if git.DefaultFeatures().SupportCheckAttrOnBare { cmd.AddArguments("--source") From f98e2c7ff2a51e18efb25e197037a08ef9f63a80 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Wed, 9 Apr 2025 18:12:10 -0700 Subject: [PATCH 11/23] Fix lint --- modules/git/attribute/checker.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/git/attribute/checker.go b/modules/git/attribute/checker.go index 7853de5936d58..889f9125af7e1 100644 --- a/modules/git/attribute/checker.go +++ b/modules/git/attribute/checker.go @@ -28,7 +28,7 @@ func checkAttrCommand(gitRepo *git.Repository, treeish string, filenames, attrib isBare, _ := strconv.ParseBool(res) // bare repository must have a treeish if isBare && treeish == "" { - return nil, nil, nil, fmt.Errorf("bare repository must have a treeish") + return nil, nil, nil, errors.New("bare repository must have a treeish") } cmd := git.NewCommand("check-attr", "-z") From bdfb061041f94ed9e94aae35bd53fc2ad27d4a99 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Wed, 9 Apr 2025 20:09:15 -0700 Subject: [PATCH 12/23] Correct tests repository under git --- modules/git/tests/repos/language_stats_repo/config | 2 +- modules/git/tests/repos/repo3_notes/config | 2 +- modules/git/tests/repos/repo4_commitsbetween/config | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/git/tests/repos/language_stats_repo/config b/modules/git/tests/repos/language_stats_repo/config index 515f4836297fd..a4ef456cbc233 100644 --- a/modules/git/tests/repos/language_stats_repo/config +++ b/modules/git/tests/repos/language_stats_repo/config @@ -1,5 +1,5 @@ [core] repositoryformatversion = 0 filemode = true - bare = false + bare = true logallrefupdates = true diff --git a/modules/git/tests/repos/repo3_notes/config b/modules/git/tests/repos/repo3_notes/config index d545cdabdbdda..5ed22e23d15d7 100644 --- a/modules/git/tests/repos/repo3_notes/config +++ b/modules/git/tests/repos/repo3_notes/config @@ -1,7 +1,7 @@ [core] repositoryformatversion = 0 filemode = false - bare = false + bare = true logallrefupdates = true symlinks = false ignorecase = true diff --git a/modules/git/tests/repos/repo4_commitsbetween/config b/modules/git/tests/repos/repo4_commitsbetween/config index d545cdabdbdda..5ed22e23d15d7 100644 --- a/modules/git/tests/repos/repo4_commitsbetween/config +++ b/modules/git/tests/repos/repo4_commitsbetween/config @@ -1,7 +1,7 @@ [core] repositoryformatversion = 0 filemode = false - bare = false + bare = true logallrefupdates = true symlinks = false ignorecase = true From 2d8c956ddf71ff816fa98600f33f41ab198397be Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Thu, 10 Apr 2025 19:18:18 -0700 Subject: [PATCH 13/23] Add tests --- modules/git/attribute/attribute.go | 6 +- modules/git/attribute/attribute_test.go | 35 +++++++++ modules/git/attribute/batch.go | 60 ++++++--------- modules/git/attribute/batch_test.go | 70 ++++++++++++++++++ modules/git/attribute/checker.go | 6 +- modules/git/attribute/checker_test.go | 74 +++++++++++++++++++ modules/git/attribute/main_test.go | 41 ++++++++++ modules/git/languagestats/language_stats.go | 2 +- .../git/languagestats/language_stats_test.go | 1 - services/repository/files/update.go | 2 +- services/repository/files/upload.go | 2 +- 11 files changed, 253 insertions(+), 46 deletions(-) create mode 100644 modules/git/attribute/attribute_test.go create mode 100644 modules/git/attribute/checker_test.go create mode 100644 modules/git/attribute/main_test.go diff --git a/modules/git/attribute/attribute.go b/modules/git/attribute/attribute.go index 516fa0d4ded54..df03687ef1d93 100644 --- a/modules/git/attribute/attribute.go +++ b/modules/git/attribute/attribute.go @@ -29,8 +29,12 @@ var LinguistAttributes = []string{ GitlabLanguage, } +func (a Attribute) IsUnspecified() bool { + return a == "" || a == "unspecified" +} + func (a Attribute) ToString() optional.Option[string] { - if a != "" && a != "unspecified" { + if !a.IsUnspecified() { return optional.Some(string(a)) } return optional.None[string]() diff --git a/modules/git/attribute/attribute_test.go b/modules/git/attribute/attribute_test.go new file mode 100644 index 0000000000000..bf5e2b70ed40e --- /dev/null +++ b/modules/git/attribute/attribute_test.go @@ -0,0 +1,35 @@ +// Copyright 2025 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package attribute + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func Test_Attribute(t *testing.T) { + assert.Empty(t, Attribute("").ToString().Value()) + assert.Empty(t, Attribute("unspecified").ToString().Value()) + assert.Equal(t, "python", Attribute("python").ToString().Value()) + assert.Equal(t, "Java", Attribute("Java").ToString().Value()) + + attributes := Attributes{ + LinguistGenerated: "true", + LinguistDocumentation: "false", + LinguistDetectable: "set", + LinguistLanguage: "Python", + GitlabLanguage: "Java", + "filter": "unspecified", + "test": "", + } + + assert.Empty(t, attributes.Get("test").ToString().Value()) + assert.Empty(t, attributes.Get("filter").ToString().Value()) + assert.Equal(t, "Python", attributes.Get(LinguistLanguage).ToString().Value()) + assert.Equal(t, "Java", attributes.Get(GitlabLanguage).ToString().Value()) + assert.True(t, attributes.Get(LinguistGenerated).ToBool().Value()) + assert.False(t, attributes.Get(LinguistDocumentation).ToBool().Value()) + assert.True(t, attributes.Get(LinguistDetectable).ToBool().Value()) +} diff --git a/modules/git/attribute/batch.go b/modules/git/attribute/batch.go index eecc0454f2820..cb2366446bb1d 100644 --- a/modules/git/attribute/batch.go +++ b/modules/git/attribute/batch.go @@ -7,7 +7,6 @@ import ( "bytes" "context" "fmt" - "io" "os" "path/filepath" "time" @@ -19,15 +18,11 @@ import ( // BatchChecker provides a reader for check-attribute content that can be long running type BatchChecker struct { // params - Attributes []string - Repo *git.Repository - Treeish string + attributesNum int + repo *git.Repository - stdinReader io.ReadCloser stdinWriter *os.File stdOut *nulSeparatedAttributeWriter - cmd *git.Command - env []string ctx context.Context cancel context.CancelFunc } @@ -46,23 +41,21 @@ func NewBatchChecker(repo *git.Repository, treeish string, attributes ...string) cmd.AddArguments("--stdin") checker := &BatchChecker{ - Attributes: attributes, - Repo: repo, - Treeish: treeish, - ctx: ctx, + attributesNum: len(attributes), + repo: repo, + ctx: ctx, cancel: func() { cancel() cleanup() }, - cmd: cmd, - env: envs, } - checker.stdinReader, checker.stdinWriter, err = os.Pipe() + stdinReader, stdinWriter, err := os.Pipe() if err != nil { checker.cancel() return nil, err } + checker.stdinWriter = stdinWriter lw := new(nulSeparatedAttributeWriter) lw.attributes = make(chan attributeTriple, 5) @@ -70,7 +63,19 @@ func NewBatchChecker(repo *git.Repository, treeish string, attributes ...string) checker.stdOut = lw go func() { - err := checker.run(ctx) + defer func() { + _ = stdinReader.Close() + _ = lw.Close() + }() + stdErr := new(bytes.Buffer) + err := cmd.Run(ctx, &git.RunOpts{ + Env: envs, + Dir: repo.Path, + Stdin: stdinReader, + Stdout: lw, + Stderr: stdErr, + }) + if err != nil && !git.IsErrCanceledOrKilled(err) { log.Error("Attribute checker for commit %s exits with error: %v", treeish, err) } @@ -80,30 +85,11 @@ func NewBatchChecker(repo *git.Repository, treeish string, attributes ...string) return checker, nil } -func (c *BatchChecker) run(ctx context.Context) error { - defer func() { - _ = c.stdinReader.Close() - _ = c.stdOut.Close() - }() - stdErr := new(bytes.Buffer) - err := c.cmd.Run(ctx, &git.RunOpts{ - Env: c.env, - Dir: c.Repo.Path, - Stdin: c.stdinReader, - Stdout: c.stdOut, - Stderr: stdErr, - }) - if err != nil && !git.IsErrCanceledOrKilled(err) { - return fmt.Errorf("failed to run attr-check. Error: %w\nStderr: %s", err, stdErr.String()) - } - return nil -} - // CheckPath check attr for given path func (c *BatchChecker) CheckPath(path string) (rs Attributes, err error) { defer func() { if err != nil && err != c.ctx.Err() { - log.Error("Unexpected error when checking path %s in %s, error: %v", path, filepath.Base(c.Repo.Path), err) + log.Error("Unexpected error when checking path %s in %s, error: %v", path, filepath.Base(c.repo.Path), err) } }() @@ -125,7 +111,7 @@ func (c *BatchChecker) CheckPath(path string) (rs Attributes, err error) { stdOutClosed = true default: } - debugMsg := fmt.Sprintf("check path %q in repo %q", path, filepath.Base(c.Repo.Path)) + debugMsg := fmt.Sprintf("check path %q in repo %q", path, filepath.Base(c.repo.Path)) debugMsg += fmt.Sprintf(", stdOut: tmp=%q, pos=%d, closed=%v", string(c.stdOut.tmp), c.stdOut.pos, stdOutClosed) // FIXME: //if c.cmd.cmd != nil { @@ -136,7 +122,7 @@ func (c *BatchChecker) CheckPath(path string) (rs Attributes, err error) { } rs = make(map[string]Attribute) - for range c.Attributes { + for i := 0; i < c.attributesNum; i++ { select { case <-time.After(5 * time.Second): // There is a strange "hang" problem in gitdiff.GetDiff -> CheckPath diff --git a/modules/git/attribute/batch_test.go b/modules/git/attribute/batch_test.go index 6c67dc3b3babd..4e990e389cf9f 100644 --- a/modules/git/attribute/batch_test.go +++ b/modules/git/attribute/batch_test.go @@ -4,10 +4,15 @@ package attribute import ( + "path/filepath" "testing" "time" + "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/test" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func Test_nulSeparatedAttributeWriter_ReadAttribute(t *testing.T) { @@ -95,3 +100,68 @@ func Test_nulSeparatedAttributeWriter_ReadAttribute(t *testing.T) { Value: "unspecified", }, attr) } + +var expectedAttrs = Attributes{ + LinguistGenerated: "unspecified", + LinguistDetectable: "unspecified", + LinguistDocumentation: "unspecified", + LinguistVendored: "unspecified", + LinguistLanguage: "Python", + GitlabLanguage: "unspecified", +} + +func Test_BatchChecker(t *testing.T) { + setting.AppDataPath = t.TempDir() + repoPath := "../tests/repos/language_stats_repo" + gitRepo, err := git.OpenRepository(t.Context(), repoPath) + require.NoError(t, err) + defer gitRepo.Close() + + commitID := "8fee858da5796dfb37704761701bb8e800ad9ef3" + + t.Run("Create index file to run git check-attr", func(t *testing.T) { + defer test.MockVariableValue(&git.DefaultFeatures().SupportCheckAttrOnBare, false)() + checker, err := NewBatchChecker(gitRepo, commitID, LinguistAttributes...) + assert.NoError(t, err) + defer checker.Close() + attributes, err := checker.CheckPath("i-am-a-python.p") + assert.NoError(t, err) + assert.Equal(t, expectedAttrs, attributes) + }) + + // run git check-attr on work tree + t.Run("Run git check-attr on git work tree", func(t *testing.T) { + dir := filepath.Join(t.TempDir(), "test-repo") + err := git.Clone(t.Context(), repoPath, dir, git.CloneRepoOptions{ + Shared: true, + Branch: "master", + }) + assert.NoError(t, err) + + tempRepo, err := git.OpenRepository(t.Context(), dir) + assert.NoError(t, err) + defer tempRepo.Close() + + checker, err := NewBatchChecker(tempRepo, "", LinguistAttributes...) + assert.NoError(t, err) + defer checker.Close() + attributes, err := checker.CheckPath("i-am-a-python.p") + assert.NoError(t, err) + assert.Equal(t, expectedAttrs, attributes) + }) + + if !git.DefaultFeatures().SupportCheckAttrOnBare { + t.Skip("git version 2.40 is required to support run check-attr on bare repo") + return + } + + t.Run("Run git check-attr in bare repository", func(t *testing.T) { + checker, err := NewBatchChecker(gitRepo, commitID, LinguistAttributes...) + assert.NoError(t, err) + defer checker.Close() + + attributes, err := checker.CheckPath("i-am-a-python.p") + assert.NoError(t, err) + assert.Equal(t, expectedAttrs, attributes) + }) +} diff --git a/modules/git/attribute/checker.go b/modules/git/attribute/checker.go index 889f9125af7e1..dc98ddd5a066b 100644 --- a/modules/git/attribute/checker.go +++ b/modules/git/attribute/checker.go @@ -67,8 +67,8 @@ type CheckAttributeOpts struct { Attributes []string } -// CheckAttribute return the Blame object of file -func CheckAttribute(ctx context.Context, gitRepo *git.Repository, treeish string, opts CheckAttributeOpts) (map[string]Attributes, error) { +// CheckAttributes return the attributes of the given filenames and attributes in the given treeish. +func CheckAttributes(ctx context.Context, gitRepo *git.Repository, treeish string, opts CheckAttributeOpts) (map[string]Attributes, error) { cmd, envs, cancel, err := checkAttrCommand(gitRepo, treeish, opts.Filenames, opts.Attributes) if err != nil { return nil, err @@ -88,13 +88,11 @@ func CheckAttribute(ctx context.Context, gitRepo *git.Repository, treeish string } fields := bytes.Split(stdOut.Bytes(), []byte{'\000'}) - if len(fields)%3 != 1 { return nil, errors.New("wrong number of fields in return from check-attr") } attributesMap := make(map[string]Attributes) - for i := 0; i < (len(fields) / 3); i++ { filename := string(fields[3*i]) attribute := string(fields[3*i+1]) diff --git a/modules/git/attribute/checker_test.go b/modules/git/attribute/checker_test.go new file mode 100644 index 0000000000000..661b87c2b27da --- /dev/null +++ b/modules/git/attribute/checker_test.go @@ -0,0 +1,74 @@ +// Copyright 2025 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package attribute + +import ( + "path/filepath" + "testing" + + "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/test" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func Test_Checker(t *testing.T) { + setting.AppDataPath = t.TempDir() + repoPath := "../tests/repos/language_stats_repo" + gitRepo, err := git.OpenRepository(t.Context(), repoPath) + require.NoError(t, err) + defer gitRepo.Close() + + commitID := "8fee858da5796dfb37704761701bb8e800ad9ef3" + + t.Run("Create index file to run git check-attr", func(t *testing.T) { + defer test.MockVariableValue(&git.DefaultFeatures().SupportCheckAttrOnBare, false)() + attrs, err := CheckAttributes(t.Context(), gitRepo, commitID, CheckAttributeOpts{ + Filenames: []string{"i-am-a-python.p"}, + Attributes: LinguistAttributes, + }) + assert.NoError(t, err) + assert.Len(t, attrs, 1) + assert.Equal(t, expectedAttrs, attrs["i-am-a-python.p"]) + }) + + // run git check-attr on work tree + t.Run("Run git check-attr on git work tree", func(t *testing.T) { + dir := filepath.Join(t.TempDir(), "test-repo") + err := git.Clone(t.Context(), repoPath, dir, git.CloneRepoOptions{ + Shared: true, + Branch: "master", + }) + assert.NoError(t, err) + + tempRepo, err := git.OpenRepository(t.Context(), dir) + assert.NoError(t, err) + defer tempRepo.Close() + + attrs, err := CheckAttributes(t.Context(), tempRepo, "", CheckAttributeOpts{ + Filenames: []string{"i-am-a-python.p"}, + Attributes: LinguistAttributes, + }) + assert.NoError(t, err) + assert.Len(t, attrs, 1) + assert.Equal(t, expectedAttrs, attrs["i-am-a-python.p"]) + }) + + if !git.DefaultFeatures().SupportCheckAttrOnBare { + t.Skip("git version 2.40 is required to support run check-attr on bare repo") + return + } + + t.Run("Run git check-attr in bare repository", func(t *testing.T) { + attrs, err := CheckAttributes(t.Context(), gitRepo, commitID, CheckAttributeOpts{ + Filenames: []string{"i-am-a-python.p"}, + Attributes: LinguistAttributes, + }) + assert.NoError(t, err) + assert.Len(t, attrs, 1) + assert.Equal(t, expectedAttrs, attrs["i-am-a-python.p"]) + }) +} diff --git a/modules/git/attribute/main_test.go b/modules/git/attribute/main_test.go new file mode 100644 index 0000000000000..df8241bfb08d4 --- /dev/null +++ b/modules/git/attribute/main_test.go @@ -0,0 +1,41 @@ +// Copyright 2025 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package attribute + +import ( + "context" + "fmt" + "os" + "testing" + + "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/util" +) + +func testRun(m *testing.M) error { + gitHomePath, err := os.MkdirTemp(os.TempDir(), "git-home") + if err != nil { + return fmt.Errorf("unable to create temp dir: %w", err) + } + defer util.RemoveAll(gitHomePath) + setting.Git.HomePath = gitHomePath + + if err = git.InitFull(context.Background()); err != nil { + return fmt.Errorf("failed to call Init: %w", err) + } + + exitCode := m.Run() + if exitCode != 0 { + return fmt.Errorf("run test failed, ExitCode=%d", exitCode) + } + return nil +} + +func TestMain(m *testing.M) { + if err := testRun(m); err != nil { + _, _ = fmt.Fprintf(os.Stderr, "Test failed: %v", err) + os.Exit(1) + } +} diff --git a/modules/git/languagestats/language_stats.go b/modules/git/languagestats/language_stats.go index 16fd09e03fc9f..3b336b6a1dff7 100644 --- a/modules/git/languagestats/language_stats.go +++ b/modules/git/languagestats/language_stats.go @@ -53,7 +53,7 @@ func mergeLanguageStats(stats map[string]int64) map[string]int64 { // GetFileLanguage tries to get the (linguist) language of the file content func GetFileLanguage(ctx context.Context, gitRepo *git.Repository, treeish, treePath string) (string, error) { - attributesMap, err := attribute.CheckAttribute(ctx, gitRepo, treeish, attribute.CheckAttributeOpts{ + attributesMap, err := attribute.CheckAttributes(ctx, gitRepo, treeish, attribute.CheckAttributeOpts{ Attributes: []string{attribute.LinguistLanguage, attribute.GitlabLanguage}, Filenames: []string{treePath}, }) diff --git a/modules/git/languagestats/language_stats_test.go b/modules/git/languagestats/language_stats_test.go index e5967c153efa2..b908ae6413d72 100644 --- a/modules/git/languagestats/language_stats_test.go +++ b/modules/git/languagestats/language_stats_test.go @@ -20,7 +20,6 @@ func TestRepository_GetLanguageStats(t *testing.T) { repoPath := "../tests/repos/language_stats_repo" gitRepo, err := git.OpenRepository(t.Context(), repoPath) require.NoError(t, err) - defer gitRepo.Close() stats, err := GetLanguageStats(gitRepo, "8fee858da5796dfb37704761701bb8e800ad9ef3") diff --git a/services/repository/files/update.go b/services/repository/files/update.go index bbc3729ec0ce3..39d82542e33f2 100644 --- a/services/repository/files/update.go +++ b/services/repository/files/update.go @@ -489,7 +489,7 @@ func CreateOrUpdateFile(ctx context.Context, t *TemporaryUploadRepository, file var lfsMetaObject *git_model.LFSMetaObject if setting.LFS.StartServer && hasOldBranch { // Check there is no way this can return multiple infos - attributesMap, err := attribute.CheckAttribute(ctx, t.gitRepo, "", attribute.CheckAttributeOpts{ + attributesMap, err := attribute.CheckAttributes(ctx, t.gitRepo, "", attribute.CheckAttributeOpts{ Attributes: []string{"filter"}, Filenames: []string{file.Options.treePath}, }) diff --git a/services/repository/files/upload.go b/services/repository/files/upload.go index f4f41cca69269..cd3bdf7e5b5e2 100644 --- a/services/repository/files/upload.go +++ b/services/repository/files/upload.go @@ -108,7 +108,7 @@ func UploadRepoFiles(ctx context.Context, repo *repo_model.Repository, doer *use var attributesMap map[string]attribute.Attributes if setting.LFS.StartServer { - attributesMap, err = attribute.CheckAttribute(ctx, t.gitRepo, "", attribute.CheckAttributeOpts{ + attributesMap, err = attribute.CheckAttributes(ctx, t.gitRepo, "", attribute.CheckAttributeOpts{ Attributes: []string{"filter"}, Filenames: names, }) From d17e7ad7697705ab703f5d6af7721d27decfd4ae Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Thu, 10 Apr 2025 19:25:51 -0700 Subject: [PATCH 14/23] Fix lint --- modules/git/attribute/batch_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/git/attribute/batch_test.go b/modules/git/attribute/batch_test.go index 4e990e389cf9f..ae75d132e0bbd 100644 --- a/modules/git/attribute/batch_test.go +++ b/modules/git/attribute/batch_test.go @@ -11,6 +11,7 @@ import ( "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/test" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) From a70ef5f701cb464e14779c133e80fa0211384d15 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Thu, 10 Apr 2025 20:24:52 -0700 Subject: [PATCH 15/23] Add trace code back --- modules/git/attribute/batch.go | 19 +++++++++---------- modules/git/command.go | 7 +++++++ 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/modules/git/attribute/batch.go b/modules/git/attribute/batch.go index cb2366446bb1d..8faf96708ecf6 100644 --- a/modules/git/attribute/batch.go +++ b/modules/git/attribute/batch.go @@ -17,14 +17,13 @@ import ( // BatchChecker provides a reader for check-attribute content that can be long running type BatchChecker struct { - // params attributesNum int repo *git.Repository - - stdinWriter *os.File - stdOut *nulSeparatedAttributeWriter - ctx context.Context - cancel context.CancelFunc + stdinWriter *os.File + stdOut *nulSeparatedAttributeWriter + ctx context.Context + cancel context.CancelFunc + cmd *git.Command } // NewBatchChecker creates a check attribute reader for the current repository and provided commit ID @@ -44,6 +43,7 @@ func NewBatchChecker(repo *git.Repository, treeish string, attributes ...string) attributesNum: len(attributes), repo: repo, ctx: ctx, + cmd: cmd, cancel: func() { cancel() cleanup() @@ -113,10 +113,9 @@ func (c *BatchChecker) CheckPath(path string) (rs Attributes, err error) { } debugMsg := fmt.Sprintf("check path %q in repo %q", path, filepath.Base(c.repo.Path)) debugMsg += fmt.Sprintf(", stdOut: tmp=%q, pos=%d, closed=%v", string(c.stdOut.tmp), c.stdOut.pos, stdOutClosed) - // FIXME: - //if c.cmd.cmd != nil { - // debugMsg += fmt.Sprintf(", process state: %q", c.cmd.cmd.ProcessState.String()) - //} + if c.cmd != nil { + debugMsg += fmt.Sprintf(", process state: %q", c.cmd.ProcessState()) + } _ = c.Close() return fmt.Errorf("CheckPath timeout: %s", debugMsg) } diff --git a/modules/git/command.go b/modules/git/command.go index f449f1ff0e6a8..eaaa4969d0bb1 100644 --- a/modules/git/command.go +++ b/modules/git/command.go @@ -80,6 +80,13 @@ func (c *Command) LogString() string { return strings.Join(a, " ") } +func (c *Command) ProcessState() string { + if c.cmd == nil { + return "" + } + return c.cmd.ProcessState.String() +} + // NewCommand creates and returns a new Git Command based on given command and arguments. // Each argument should be safe to be trusted. User-provided arguments should be passed to AddDynamicArguments instead. func NewCommand(args ...internal.CmdArg) *Command { From ed37f3a6600e06d63f42d66a3f86c47ab8bd0acc Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Thu, 10 Apr 2025 20:49:57 -0700 Subject: [PATCH 16/23] Remove unnecessary code --- routers/web/repo/setting/lfs.go | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/routers/web/repo/setting/lfs.go b/routers/web/repo/setting/lfs.go index 052b448eb5a36..2e4c0249efd8d 100644 --- a/routers/web/repo/setting/lfs.go +++ b/routers/web/repo/setting/lfs.go @@ -135,13 +135,7 @@ func LFSLocks(ctx *context.Context) { } defer gitRepo.Close() - if err := gitRepo.ReadTreeToIndex(ctx.Repo.Repository.DefaultBranch); err != nil { - log.Error("Unable to read the default branch to the index: %s (%v)", ctx.Repo.Repository.DefaultBranch, err) - ctx.ServerError("LFSLocks", fmt.Errorf("unable to read the default branch to the index: %s (%w)", ctx.Repo.Repository.DefaultBranch, err)) - return - } - - checker, err := attribute.NewBatchChecker(gitRepo, "", "lockable") + checker, err := attribute.NewBatchChecker(gitRepo, ctx.Repo.Repository.DefaultBranch, "lockable") if err != nil { log.Error("Unable to check attributes in %s (%v)", tmpBasePath, err) ctx.ServerError("LFSLocks", err) From 9db67492f35f01e3e2267e19d7c441828745b2f5 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Thu, 10 Apr 2025 21:39:12 -0700 Subject: [PATCH 17/23] Some improvements --- modules/git/attribute/attribute.go | 18 ++++++++-------- modules/git/attribute/batch.go | 21 +++++++++---------- modules/git/attribute/checker.go | 18 ++-------------- modules/git/languagestats/language_stats.go | 2 +- .../languagestats/language_stats_nogogit.go | 17 ++++++--------- routers/web/repo/setting/lfs.go | 2 +- routers/web/repo/view_file.go | 4 ++-- services/gitdiff/gitdiff.go | 5 ++--- 8 files changed, 33 insertions(+), 54 deletions(-) diff --git a/modules/git/attribute/attribute.go b/modules/git/attribute/attribute.go index df03687ef1d93..5b5c0d38dce24 100644 --- a/modules/git/attribute/attribute.go +++ b/modules/git/attribute/attribute.go @@ -60,27 +60,27 @@ func (attrs Attributes) Get(name string) Attribute { return "" } -func (attrs Attributes) HasVendored() optional.Option[bool] { +func (attrs Attributes) GetVendored() optional.Option[bool] { return attrs.Get(LinguistVendored).ToBool() } -func (attrs Attributes) HasGenerated() optional.Option[bool] { +func (attrs Attributes) GetGenerated() optional.Option[bool] { return attrs.Get(LinguistGenerated).ToBool() } -func (attrs Attributes) HasDocumentation() optional.Option[bool] { +func (attrs Attributes) GetDocumentation() optional.Option[bool] { return attrs.Get(LinguistDocumentation).ToBool() } -func (attrs Attributes) HasDetectable() optional.Option[bool] { +func (attrs Attributes) GetDetectable() optional.Option[bool] { return attrs.Get(LinguistDetectable).ToBool() } -func (attrs Attributes) LinguistLanguage() optional.Option[string] { +func (attrs Attributes) GetLinguistLanguage() optional.Option[string] { return attrs.Get(LinguistLanguage).ToString() } -func (attrs Attributes) GitlabLanguage() optional.Option[string] { +func (attrs Attributes) GetGitlabLanguage() optional.Option[string] { attrStr := attrs.Get(GitlabLanguage).ToString() if attrStr.Has() { raw := attrStr.Value() @@ -94,13 +94,13 @@ func (attrs Attributes) GitlabLanguage() optional.Option[string] { return attrStr } -func (attrs Attributes) Language() optional.Option[string] { +func (attrs Attributes) GetLanguage() optional.Option[string] { // prefer linguist-language over gitlab-language // if linguist-language is not set, use gitlab-language // if both are not set, return none - language := attrs.LinguistLanguage() + language := attrs.GetLinguistLanguage() if language.Value() == "" { - language = attrs.GitlabLanguage() + language = attrs.GetGitlabLanguage() } return language } diff --git a/modules/git/attribute/batch.go b/modules/git/attribute/batch.go index 8faf96708ecf6..9ca37a5aa716e 100644 --- a/modules/git/attribute/batch.go +++ b/modules/git/attribute/batch.go @@ -27,19 +27,17 @@ type BatchChecker struct { } // NewBatchChecker creates a check attribute reader for the current repository and provided commit ID -func NewBatchChecker(repo *git.Repository, treeish string, attributes ...string) (*BatchChecker, error) { +func NewBatchChecker(repo *git.Repository, treeish string, attributes ...string) (checker *BatchChecker, returnedErr error) { ctx, cancel := context.WithCancel(repo.Ctx) - if len(attributes) == 0 { - attributes = LinguistAttributes - } cmd, envs, cleanup, err := checkAttrCommand(repo, treeish, nil, attributes) if err != nil { cancel() return nil, err } + cmd.AddArguments("--stdin") - checker := &BatchChecker{ + checker = &BatchChecker{ attributesNum: len(attributes), repo: repo, ctx: ctx, @@ -49,16 +47,20 @@ func NewBatchChecker(repo *git.Repository, treeish string, attributes ...string) cleanup() }, } + defer func() { + if returnedErr != nil { + checker.cancel() + } + }() stdinReader, stdinWriter, err := os.Pipe() if err != nil { - checker.cancel() return nil, err } checker.stdinWriter = stdinWriter lw := new(nulSeparatedAttributeWriter) - lw.attributes = make(chan attributeTriple, 5) + lw.attributes = make(chan attributeTriple, len(attributes)) lw.closed = make(chan struct{}) checker.stdOut = lw @@ -124,10 +126,7 @@ func (c *BatchChecker) CheckPath(path string) (rs Attributes, err error) { for i := 0; i < c.attributesNum; i++ { select { case <-time.After(5 * time.Second): - // There is a strange "hang" problem in gitdiff.GetDiff -> CheckPath - // So add a timeout here to mitigate the problem, and output more logs for debug purpose - // In real world, if CheckPath runs long than seconds, it blocks the end user's operation, - // and at the moment the CheckPath result is not so important, so we can just ignore it. + // there is no "hang" problem now. This code is just used to catch other potential problems. return nil, reportTimeout() case attr, ok := <-c.stdOut.ReadAttribute(): if !ok { diff --git a/modules/git/attribute/checker.go b/modules/git/attribute/checker.go index dc98ddd5a066b..385254f69dd1e 100644 --- a/modules/git/attribute/checker.go +++ b/modules/git/attribute/checker.go @@ -9,7 +9,6 @@ import ( "errors" "fmt" "os" - "strconv" "code.gitea.io/gitea/modules/git" ) @@ -17,25 +16,12 @@ import ( func checkAttrCommand(gitRepo *git.Repository, treeish string, filenames, attributes []string) (*git.Command, []string, func(), error) { cancel := func() {} envs := []string{"GIT_FLUSH=1"} - - // Check if the repository is bare - res, _, err := git.NewCommand("rev-parse", "--is-bare-repository").RunStdString(gitRepo.Ctx, &git.RunOpts{ - Dir: gitRepo.Path, - }) - if err != nil { - return nil, nil, nil, fmt.Errorf("failed to run rev-parse: %w", err) - } - isBare, _ := strconv.ParseBool(res) - // bare repository must have a treeish - if isBare && treeish == "" { - return nil, nil, nil, errors.New("bare repository must have a treeish") - } - cmd := git.NewCommand("check-attr", "-z") if len(attributes) == 0 { cmd.AddArguments("--all") } + // there is treeish, read from source or index if treeish != "" { if git.DefaultFeatures().SupportCheckAttrOnBare { cmd.AddArguments("--source") @@ -53,7 +39,7 @@ func checkAttrCommand(gitRepo *git.Repository, treeish string, filenames, attrib ) cancel = deleteTemporaryFile } - } + } // no treeish, read from working directory cmd.AddDynamicArguments(attributes...) if len(filenames) > 0 { diff --git a/modules/git/languagestats/language_stats.go b/modules/git/languagestats/language_stats.go index 3b336b6a1dff7..a71284c3e446f 100644 --- a/modules/git/languagestats/language_stats.go +++ b/modules/git/languagestats/language_stats.go @@ -61,5 +61,5 @@ func GetFileLanguage(ctx context.Context, gitRepo *git.Repository, treeish, tree return "", err } - return attributesMap[treePath].Language().Value(), nil + return attributesMap[treePath].GetLanguage().Value(), nil } diff --git a/modules/git/languagestats/language_stats_nogogit.go b/modules/git/languagestats/language_stats_nogogit.go index 035cd93b0af32..ff3d85ab3f020 100644 --- a/modules/git/languagestats/language_stats_nogogit.go +++ b/modules/git/languagestats/language_stats_nogogit.go @@ -64,7 +64,7 @@ func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64, return nil, err } - checker, err := attribute.NewBatchChecker(repo, commitID) + checker, err := attribute.NewBatchChecker(repo, commitID, attribute.LinguistAttributes...) if err != nil { return nil, err } @@ -103,28 +103,23 @@ func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64, attrs, err := checker.CheckPath(f.Name()) if err == nil { - isVendored = attrs.HasVendored() - if isVendored.ValueOrDefault(false) { + if isVendored = attrs.GetVendored(); isVendored.ValueOrDefault(false) { continue } - isGenerated = attrs.HasGenerated() - if isGenerated.ValueOrDefault(false) { + if isGenerated = attrs.GetGenerated(); isGenerated.ValueOrDefault(false) { continue } - isDocumentation = attrs.HasDocumentation() - if isDocumentation.ValueOrDefault(false) { + if isDocumentation = attrs.GetDocumentation(); isDocumentation.ValueOrDefault(false) { continue } - isDetectable = attrs.HasDetectable() - if !isDetectable.ValueOrDefault(true) { + if isDetectable = attrs.GetDetectable(); !isDetectable.ValueOrDefault(true) { continue } - hasLanguage := attrs.Language() - if hasLanguage.Value() != "" { + if hasLanguage := attrs.GetLanguage(); hasLanguage.Value() != "" { language := hasLanguage.Value() // group languages, such as Pug -> HTML; SCSS -> CSS diff --git a/routers/web/repo/setting/lfs.go b/routers/web/repo/setting/lfs.go index 2e4c0249efd8d..31dfa92e1c60f 100644 --- a/routers/web/repo/setting/lfs.go +++ b/routers/web/repo/setting/lfs.go @@ -152,7 +152,7 @@ func LFSLocks(ctx *context.Context) { log.Error("Unable to check attributes in %s: %s (%v)", tmpBasePath, lock.Path, err) continue } - lockables[i] = attrs["lockable"] == "set" + lockables[i] = attrs["lockable"].ToBool().Value() } ctx.Data["Lockables"] = lockables diff --git a/routers/web/repo/view_file.go b/routers/web/repo/view_file.go index 65f8afdcd6d83..b4d2bb0188982 100644 --- a/routers/web/repo/view_file.go +++ b/routers/web/repo/view_file.go @@ -294,8 +294,8 @@ func prepareToRenderFile(ctx *context.Context, entry *git.TreeEntry) { attrs, err := checker.CheckPath(ctx.Repo.TreePath) if err == nil { - ctx.Data["IsVendored"] = attrs.HasVendored().Value() - ctx.Data["IsGenerated"] = attrs.HasGenerated().Value() + ctx.Data["IsVendored"] = attrs.GetVendored().Value() + ctx.Data["IsGenerated"] = attrs.GetGenerated().Value() } } diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index 46b525f090b39..84de5cd627627 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -1249,9 +1249,8 @@ func GetDiffForRender(ctx context.Context, gitRepo *git.Repository, opts *DiffOp isGenerated := optional.None[bool]() attrs, err := checker.CheckPath(diffFile.Name) if err == nil { - isVendored = attrs.HasVendored() - isGenerated = attrs.HasGenerated() - language := attrs.Language() + isVendored, isGenerated = attrs.GetVendored(), attrs.GetGenerated() + language := attrs.GetLanguage() if language.Has() { diffFile.Language = language.Value() } From d6f138b42b04d2b4aba22497eb0eafe16aa95650 Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Fri, 11 Apr 2025 12:55:56 +0800 Subject: [PATCH 18/23] fine tune --- modules/git/attribute/attribute.go | 1 + modules/git/attribute/batch.go | 19 ++++++++++++------- modules/git/attribute/batch_test.go | 6 +++--- .../git/languagestats/language_stats_gogit.go | 12 ++++++------ .../languagestats/language_stats_nogogit.go | 2 +- routers/web/repo/setting/lfs.go | 2 +- routers/web/repo/view_file.go | 2 +- services/gitdiff/gitdiff.go | 2 +- services/repository/files/update.go | 2 +- services/repository/files/upload.go | 2 +- 10 files changed, 28 insertions(+), 22 deletions(-) diff --git a/modules/git/attribute/attribute.go b/modules/git/attribute/attribute.go index 5b5c0d38dce24..59c90af68438b 100644 --- a/modules/git/attribute/attribute.go +++ b/modules/git/attribute/attribute.go @@ -18,6 +18,7 @@ const ( LinguistDetectable = "linguist-detectable" LinguistLanguage = "linguist-language" GitlabLanguage = "gitlab-language" + Lockable = "lockable" ) var LinguistAttributes = []string{ diff --git a/modules/git/attribute/batch.go b/modules/git/attribute/batch.go index 9ca37a5aa716e..c02abdd326c74 100644 --- a/modules/git/attribute/batch.go +++ b/modules/git/attribute/batch.go @@ -27,13 +27,23 @@ type BatchChecker struct { } // NewBatchChecker creates a check attribute reader for the current repository and provided commit ID -func NewBatchChecker(repo *git.Repository, treeish string, attributes ...string) (checker *BatchChecker, returnedErr error) { +func NewBatchChecker(repo *git.Repository, treeish string, attributes []string) (checker *BatchChecker, returnedErr error) { ctx, cancel := context.WithCancel(repo.Ctx) + defer func() { + if returnedErr != nil { + cancel() + } + }() + cmd, envs, cleanup, err := checkAttrCommand(repo, treeish, nil, attributes) if err != nil { - cancel() return nil, err } + defer func() { + if returnedErr != nil { + cleanup() + } + }() cmd.AddArguments("--stdin") @@ -47,11 +57,6 @@ func NewBatchChecker(repo *git.Repository, treeish string, attributes ...string) cleanup() }, } - defer func() { - if returnedErr != nil { - checker.cancel() - } - }() stdinReader, stdinWriter, err := os.Pipe() if err != nil { diff --git a/modules/git/attribute/batch_test.go b/modules/git/attribute/batch_test.go index ae75d132e0bbd..08114df06ac9f 100644 --- a/modules/git/attribute/batch_test.go +++ b/modules/git/attribute/batch_test.go @@ -122,7 +122,7 @@ func Test_BatchChecker(t *testing.T) { t.Run("Create index file to run git check-attr", func(t *testing.T) { defer test.MockVariableValue(&git.DefaultFeatures().SupportCheckAttrOnBare, false)() - checker, err := NewBatchChecker(gitRepo, commitID, LinguistAttributes...) + checker, err := NewBatchChecker(gitRepo, commitID, LinguistAttributes) assert.NoError(t, err) defer checker.Close() attributes, err := checker.CheckPath("i-am-a-python.p") @@ -143,7 +143,7 @@ func Test_BatchChecker(t *testing.T) { assert.NoError(t, err) defer tempRepo.Close() - checker, err := NewBatchChecker(tempRepo, "", LinguistAttributes...) + checker, err := NewBatchChecker(tempRepo, "", LinguistAttributes) assert.NoError(t, err) defer checker.Close() attributes, err := checker.CheckPath("i-am-a-python.p") @@ -157,7 +157,7 @@ func Test_BatchChecker(t *testing.T) { } t.Run("Run git check-attr in bare repository", func(t *testing.T) { - checker, err := NewBatchChecker(gitRepo, commitID, LinguistAttributes...) + checker, err := NewBatchChecker(gitRepo, commitID, LinguistAttributes) assert.NoError(t, err) defer checker.Close() diff --git a/modules/git/languagestats/language_stats_gogit.go b/modules/git/languagestats/language_stats_gogit.go index 05a070efc0727..418c05b15789f 100644 --- a/modules/git/languagestats/language_stats_gogit.go +++ b/modules/git/languagestats/language_stats_gogit.go @@ -42,7 +42,7 @@ func GetLanguageStats(repo *git_module.Repository, commitID string) (map[string] return nil, err } - checker, err := attribute.NewBatchChecker(repo, commitID) + checker, err := attribute.NewBatchChecker(repo, commitID, attribute.LinguistAttributes) if err != nil { return nil, err } @@ -69,27 +69,27 @@ func GetLanguageStats(repo *git_module.Repository, commitID string) (map[string] attrs, err := checker.CheckPath(f.Name) if err == nil { - isVendored = attrs.HasVendored() + isVendored = attrs.GetVendored() if isVendored.ValueOrDefault(false) { return nil } - isGenerated = attrs.HasGenerated() + isGenerated = attrs.GetGenerated() if isGenerated.ValueOrDefault(false) { return nil } - isDocumentation = attrs.HasDocumentation() + isDocumentation = attrs.GetDocumentation() if isDocumentation.ValueOrDefault(false) { return nil } - isDetectable = attrs.HasDetectable() + isDetectable = attrs.GetDetectable() if !isDetectable.ValueOrDefault(true) { return nil } - hasLanguage := attrs.Language() + hasLanguage := attrs.GetLanguage() if hasLanguage.Value() != "" { language := hasLanguage.Value() diff --git a/modules/git/languagestats/language_stats_nogogit.go b/modules/git/languagestats/language_stats_nogogit.go index ff3d85ab3f020..34797263a603a 100644 --- a/modules/git/languagestats/language_stats_nogogit.go +++ b/modules/git/languagestats/language_stats_nogogit.go @@ -64,7 +64,7 @@ func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64, return nil, err } - checker, err := attribute.NewBatchChecker(repo, commitID, attribute.LinguistAttributes...) + checker, err := attribute.NewBatchChecker(repo, commitID, attribute.LinguistAttributes) if err != nil { return nil, err } diff --git a/routers/web/repo/setting/lfs.go b/routers/web/repo/setting/lfs.go index 31dfa92e1c60f..018c59cf61d71 100644 --- a/routers/web/repo/setting/lfs.go +++ b/routers/web/repo/setting/lfs.go @@ -135,7 +135,7 @@ func LFSLocks(ctx *context.Context) { } defer gitRepo.Close() - checker, err := attribute.NewBatchChecker(gitRepo, ctx.Repo.Repository.DefaultBranch, "lockable") + checker, err := attribute.NewBatchChecker(gitRepo, ctx.Repo.Repository.DefaultBranch, []string{attribute.Lockable}) if err != nil { log.Error("Unable to check attributes in %s (%v)", tmpBasePath, err) ctx.ServerError("LFSLocks", err) diff --git a/routers/web/repo/view_file.go b/routers/web/repo/view_file.go index b4d2bb0188982..1fbaa5ab99ef0 100644 --- a/routers/web/repo/view_file.go +++ b/routers/web/repo/view_file.go @@ -285,7 +285,7 @@ func prepareToRenderFile(ctx *context.Context, entry *git.TreeEntry) { } if ctx.Repo.GitRepo != nil { - checker, err := attribute.NewBatchChecker(ctx.Repo.GitRepo, ctx.Repo.CommitID, attribute.LinguistGenerated, attribute.LinguistVendored) + checker, err := attribute.NewBatchChecker(ctx.Repo.GitRepo, ctx.Repo.CommitID, []string{attribute.LinguistGenerated, attribute.LinguistVendored}) if err != nil { ctx.ServerError("NewAttributeChecker", err) return diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index 84de5cd627627..9ee86d9dfc0f8 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -1238,7 +1238,7 @@ func GetDiffForRender(ctx context.Context, gitRepo *git.Repository, opts *DiffOp return nil, err } - checker, err := attribute.NewBatchChecker(gitRepo, opts.AfterCommitID, attribute.LinguistVendored, attribute.LinguistGenerated, attribute.LinguistLanguage, attribute.GitlabLanguage) + checker, err := attribute.NewBatchChecker(gitRepo, opts.AfterCommitID, []string{attribute.LinguistVendored, attribute.LinguistGenerated, attribute.LinguistLanguage, attribute.GitlabLanguage}) if err != nil { return nil, err } diff --git a/services/repository/files/update.go b/services/repository/files/update.go index 39d82542e33f2..3ae7ad8f3716a 100644 --- a/services/repository/files/update.go +++ b/services/repository/files/update.go @@ -489,7 +489,7 @@ func CreateOrUpdateFile(ctx context.Context, t *TemporaryUploadRepository, file var lfsMetaObject *git_model.LFSMetaObject if setting.LFS.StartServer && hasOldBranch { // Check there is no way this can return multiple infos - attributesMap, err := attribute.CheckAttributes(ctx, t.gitRepo, "", attribute.CheckAttributeOpts{ + attributesMap, err := attribute.CheckAttributes(ctx, t.gitRepo, "" /* use temp repo's working dir */, attribute.CheckAttributeOpts{ Attributes: []string{"filter"}, Filenames: []string{file.Options.treePath}, }) diff --git a/services/repository/files/upload.go b/services/repository/files/upload.go index cd3bdf7e5b5e2..87062b55b8e32 100644 --- a/services/repository/files/upload.go +++ b/services/repository/files/upload.go @@ -108,7 +108,7 @@ func UploadRepoFiles(ctx context.Context, repo *repo_model.Repository, doer *use var attributesMap map[string]attribute.Attributes if setting.LFS.StartServer { - attributesMap, err = attribute.CheckAttributes(ctx, t.gitRepo, "", attribute.CheckAttributeOpts{ + attributesMap, err = attribute.CheckAttributes(ctx, t.gitRepo, "" /* use temp repo's working dir */, attribute.CheckAttributeOpts{ Attributes: []string{"filter"}, Filenames: names, }) From 2391070f1ef998fc411534f102513846eaf0d3eb Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Fri, 11 Apr 2025 13:01:28 +0800 Subject: [PATCH 19/23] add comment --- modules/git/attribute/batch.go | 1 + modules/git/attribute/checker.go | 1 + 2 files changed, 2 insertions(+) diff --git a/modules/git/attribute/batch.go b/modules/git/attribute/batch.go index c02abdd326c74..e0e6ba0188216 100644 --- a/modules/git/attribute/batch.go +++ b/modules/git/attribute/batch.go @@ -27,6 +27,7 @@ type BatchChecker struct { } // NewBatchChecker creates a check attribute reader for the current repository and provided commit ID +// If treeish is empty, then it will use current working directory, otherwise it will use the provided treeish on the bare repo func NewBatchChecker(repo *git.Repository, treeish string, attributes []string) (checker *BatchChecker, returnedErr error) { ctx, cancel := context.WithCancel(repo.Ctx) defer func() { diff --git a/modules/git/attribute/checker.go b/modules/git/attribute/checker.go index 385254f69dd1e..c5fdb627f946b 100644 --- a/modules/git/attribute/checker.go +++ b/modules/git/attribute/checker.go @@ -54,6 +54,7 @@ type CheckAttributeOpts struct { } // CheckAttributes return the attributes of the given filenames and attributes in the given treeish. +// If treeish is empty, then it will use current working directory, otherwise it will use the provided treeish on the bare repo func CheckAttributes(ctx context.Context, gitRepo *git.Repository, treeish string, opts CheckAttributeOpts) (map[string]Attributes, error) { cmd, envs, cancel, err := checkAttrCommand(gitRepo, treeish, opts.Filenames, opts.Attributes) if err != nil { From 1a522442d9e42491f4908ab640cd82431f35fca6 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Thu, 10 Apr 2025 22:53:11 -0700 Subject: [PATCH 20/23] merge two functions call as one --- routers/web/repo/view_file.go | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/routers/web/repo/view_file.go b/routers/web/repo/view_file.go index b4d2bb0188982..187b451559088 100644 --- a/routers/web/repo/view_file.go +++ b/routers/web/repo/view_file.go @@ -19,7 +19,6 @@ import ( "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/git/attribute" - "code.gitea.io/gitea/modules/git/languagestats" "code.gitea.io/gitea/modules/highlight" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/markup" @@ -148,6 +147,9 @@ func prepareToRenderFile(ctx *context.Context, entry *git.TreeEntry) { ctx.Data["EditFileTooltip"] = ctx.Tr("repo.editor.cannot_edit_non_text_files") } + var attrs attribute.Attributes + attributes := []string{attribute.LinguistGenerated, attribute.LinguistVendored} + switch { case isRepresentableAsText: if fInfo.fileSize >= setting.UI.MaxDisplayFileSize { @@ -210,9 +212,17 @@ func prepareToRenderFile(ctx *context.Context, entry *git.TreeEntry) { ctx.Data["NumLines"] = bytes.Count(buf, []byte{'\n'}) + 1 } - language, err := languagestats.GetFileLanguage(ctx, ctx.Repo.GitRepo, ctx.Repo.CommitID, ctx.Repo.TreePath) + var language string + attributes = append(attributes, attribute.LinguistLanguage, attribute.GitlabLanguage) + attrsMap, err := attribute.CheckAttributes(ctx, ctx.Repo.GitRepo, ctx.Repo.CommitID, attribute.CheckAttributeOpts{ + Filenames: []string{ctx.Repo.TreePath}, + Attributes: attributes, + }) if err != nil { log.Error("Unable to get file language for %-v:%s. Error: %v", ctx.Repo.Repository, ctx.Repo.TreePath, err) + } else { + attrs = attrsMap[ctx.Repo.TreePath] // then it will be reused out of the switch block + language = attrs.GetLanguage().Value() } fileContent, lexerName, err := highlight.File(blob.Name(), language, buf) @@ -284,21 +294,20 @@ func prepareToRenderFile(ctx *context.Context, entry *git.TreeEntry) { } } - if ctx.Repo.GitRepo != nil { - checker, err := attribute.NewBatchChecker(ctx.Repo.GitRepo, ctx.Repo.CommitID, attribute.LinguistGenerated, attribute.LinguistVendored) + if attrs == nil { + attrsMap, err := attribute.CheckAttributes(ctx, ctx.Repo.GitRepo, ctx.Repo.CommitID, attribute.CheckAttributeOpts{ + Filenames: []string{ctx.Repo.TreePath}, + Attributes: attributes, + }) if err != nil { - ctx.ServerError("NewAttributeChecker", err) + ctx.ServerError("attribute.CheckAttributes", err) return } - defer checker.Close() - - attrs, err := checker.CheckPath(ctx.Repo.TreePath) - if err == nil { - ctx.Data["IsVendored"] = attrs.GetVendored().Value() - ctx.Data["IsGenerated"] = attrs.GetGenerated().Value() - } + attrs = attrsMap[ctx.Repo.TreePath] } + ctx.Data["IsVendored"], ctx.Data["IsGenerated"] = attrs.GetVendored().Value(), attrs.GetGenerated().Value() + if fInfo.st.IsImage() && !fInfo.st.IsSvgImage() { img, _, err := image.DecodeConfig(bytes.NewReader(buf)) if err == nil { From 1d333296eb7c809eecf55b1c42dce586a044a55b Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Fri, 11 Apr 2025 15:10:05 +0800 Subject: [PATCH 21/23] don't make Attributes expose internal map --- modules/git/attribute/attribute.go | 29 +++++++++------- modules/git/attribute/attribute_test.go | 16 +++++---- modules/git/attribute/batch.go | 6 ++-- modules/git/attribute/checker.go | 14 ++++---- routers/web/repo/setting/lfs.go | 2 +- routers/web/repo/view_file.go | 44 ++++++++++--------------- services/repository/files/update.go | 4 +-- services/repository/files/upload.go | 8 ++--- 8 files changed, 61 insertions(+), 62 deletions(-) diff --git a/modules/git/attribute/attribute.go b/modules/git/attribute/attribute.go index 59c90af68438b..adf323ef41c05 100644 --- a/modules/git/attribute/attribute.go +++ b/modules/git/attribute/attribute.go @@ -19,6 +19,7 @@ const ( LinguistLanguage = "linguist-language" GitlabLanguage = "gitlab-language" Lockable = "lockable" + Filter = "filter" ) var LinguistAttributes = []string{ @@ -41,7 +42,7 @@ func (a Attribute) ToString() optional.Option[string] { return optional.None[string]() } -// true if "set"/"true", false if "unset"/"false", none otherwise +// ToBool converts the attribute value to optional boolean: true if "set"/"true", false if "unset"/"false", none otherwise func (a Attribute) ToBool() optional.Option[bool] { switch a { case "set", "true": @@ -52,36 +53,42 @@ func (a Attribute) ToBool() optional.Option[bool] { return optional.None[bool]() } -type Attributes map[string]Attribute +type Attributes struct { + m map[string]Attribute +} + +func NewAttributes() *Attributes { + return &Attributes{m: make(map[string]Attribute)} +} -func (attrs Attributes) Get(name string) Attribute { - if value, has := attrs[name]; has { +func (attrs *Attributes) Get(name string) Attribute { + if value, has := attrs.m[name]; has { return value } return "" } -func (attrs Attributes) GetVendored() optional.Option[bool] { +func (attrs *Attributes) GetVendored() optional.Option[bool] { return attrs.Get(LinguistVendored).ToBool() } -func (attrs Attributes) GetGenerated() optional.Option[bool] { +func (attrs *Attributes) GetGenerated() optional.Option[bool] { return attrs.Get(LinguistGenerated).ToBool() } -func (attrs Attributes) GetDocumentation() optional.Option[bool] { +func (attrs *Attributes) GetDocumentation() optional.Option[bool] { return attrs.Get(LinguistDocumentation).ToBool() } -func (attrs Attributes) GetDetectable() optional.Option[bool] { +func (attrs *Attributes) GetDetectable() optional.Option[bool] { return attrs.Get(LinguistDetectable).ToBool() } -func (attrs Attributes) GetLinguistLanguage() optional.Option[string] { +func (attrs *Attributes) GetLinguistLanguage() optional.Option[string] { return attrs.Get(LinguistLanguage).ToString() } -func (attrs Attributes) GetGitlabLanguage() optional.Option[string] { +func (attrs *Attributes) GetGitlabLanguage() optional.Option[string] { attrStr := attrs.Get(GitlabLanguage).ToString() if attrStr.Has() { raw := attrStr.Value() @@ -95,7 +102,7 @@ func (attrs Attributes) GetGitlabLanguage() optional.Option[string] { return attrStr } -func (attrs Attributes) GetLanguage() optional.Option[string] { +func (attrs *Attributes) GetLanguage() optional.Option[string] { // prefer linguist-language over gitlab-language // if linguist-language is not set, use gitlab-language // if both are not set, return none diff --git a/modules/git/attribute/attribute_test.go b/modules/git/attribute/attribute_test.go index bf5e2b70ed40e..dadb5582a3cb5 100644 --- a/modules/git/attribute/attribute_test.go +++ b/modules/git/attribute/attribute_test.go @@ -16,13 +16,15 @@ func Test_Attribute(t *testing.T) { assert.Equal(t, "Java", Attribute("Java").ToString().Value()) attributes := Attributes{ - LinguistGenerated: "true", - LinguistDocumentation: "false", - LinguistDetectable: "set", - LinguistLanguage: "Python", - GitlabLanguage: "Java", - "filter": "unspecified", - "test": "", + m: map[string]Attribute{ + LinguistGenerated: "true", + LinguistDocumentation: "false", + LinguistDetectable: "set", + LinguistLanguage: "Python", + GitlabLanguage: "Java", + "filter": "unspecified", + "test": "", + }, } assert.Empty(t, attributes.Get("test").ToString().Value()) diff --git a/modules/git/attribute/batch.go b/modules/git/attribute/batch.go index e0e6ba0188216..4e31fda5753cd 100644 --- a/modules/git/attribute/batch.go +++ b/modules/git/attribute/batch.go @@ -94,7 +94,7 @@ func NewBatchChecker(repo *git.Repository, treeish string, attributes []string) } // CheckPath check attr for given path -func (c *BatchChecker) CheckPath(path string) (rs Attributes, err error) { +func (c *BatchChecker) CheckPath(path string) (rs *Attributes, err error) { defer func() { if err != nil && err != c.ctx.Err() { log.Error("Unexpected error when checking path %s in %s, error: %v", path, filepath.Base(c.repo.Path), err) @@ -128,7 +128,7 @@ func (c *BatchChecker) CheckPath(path string) (rs Attributes, err error) { return fmt.Errorf("CheckPath timeout: %s", debugMsg) } - rs = make(map[string]Attribute) + rs = NewAttributes() for i := 0; i < c.attributesNum; i++ { select { case <-time.After(5 * time.Second): @@ -138,7 +138,7 @@ func (c *BatchChecker) CheckPath(path string) (rs Attributes, err error) { if !ok { return nil, c.ctx.Err() } - rs[attr.Attribute] = Attribute(attr.Value) + rs.m[attr.Attribute] = Attribute(attr.Value) case <-c.ctx.Done(): return nil, c.ctx.Err() } diff --git a/modules/git/attribute/checker.go b/modules/git/attribute/checker.go index c5fdb627f946b..0ecb6976ff713 100644 --- a/modules/git/attribute/checker.go +++ b/modules/git/attribute/checker.go @@ -55,7 +55,7 @@ type CheckAttributeOpts struct { // CheckAttributes return the attributes of the given filenames and attributes in the given treeish. // If treeish is empty, then it will use current working directory, otherwise it will use the provided treeish on the bare repo -func CheckAttributes(ctx context.Context, gitRepo *git.Repository, treeish string, opts CheckAttributeOpts) (map[string]Attributes, error) { +func CheckAttributes(ctx context.Context, gitRepo *git.Repository, treeish string, opts CheckAttributeOpts) (map[string]*Attributes, error) { cmd, envs, cancel, err := checkAttrCommand(gitRepo, treeish, opts.Filenames, opts.Attributes) if err != nil { return nil, err @@ -79,17 +79,17 @@ func CheckAttributes(ctx context.Context, gitRepo *git.Repository, treeish strin return nil, errors.New("wrong number of fields in return from check-attr") } - attributesMap := make(map[string]Attributes) + attributesMap := make(map[string]*Attributes) for i := 0; i < (len(fields) / 3); i++ { filename := string(fields[3*i]) attribute := string(fields[3*i+1]) info := string(fields[3*i+2]) - attribute2info := attributesMap[filename] - if attribute2info == nil { - attribute2info = make(Attributes) + attribute2info, ok := attributesMap[filename] + if !ok { + attribute2info = NewAttributes() + attributesMap[filename] = attribute2info } - attribute2info[attribute] = Attribute(info) - attributesMap[filename] = attribute2info + attribute2info.m[attribute] = Attribute(info) } return attributesMap, nil diff --git a/routers/web/repo/setting/lfs.go b/routers/web/repo/setting/lfs.go index 018c59cf61d71..a065620b2b249 100644 --- a/routers/web/repo/setting/lfs.go +++ b/routers/web/repo/setting/lfs.go @@ -152,7 +152,7 @@ func LFSLocks(ctx *context.Context) { log.Error("Unable to check attributes in %s: %s (%v)", tmpBasePath, lock.Path, err) continue } - lockables[i] = attrs["lockable"].ToBool().Value() + lockables[i] = attrs.Get(attribute.Lockable).ToBool().Value() } ctx.Data["Lockables"] = lockables diff --git a/routers/web/repo/view_file.go b/routers/web/repo/view_file.go index 187b451559088..8ce3724f6233e 100644 --- a/routers/web/repo/view_file.go +++ b/routers/web/repo/view_file.go @@ -147,8 +147,22 @@ func prepareToRenderFile(ctx *context.Context, entry *git.TreeEntry) { ctx.Data["EditFileTooltip"] = ctx.Tr("repo.editor.cannot_edit_non_text_files") } - var attrs attribute.Attributes - attributes := []string{attribute.LinguistGenerated, attribute.LinguistVendored} + // read all needed attributes which will be used later + // there should be no performance different between reading 2 or 4 here + attrsMap, err := attribute.CheckAttributes(ctx, ctx.Repo.GitRepo, ctx.Repo.CommitID, attribute.CheckAttributeOpts{ + Filenames: []string{ctx.Repo.TreePath}, + Attributes: []string{attribute.LinguistGenerated, attribute.LinguistVendored, attribute.LinguistLanguage, attribute.GitlabLanguage}, + }) + if err != nil { + ctx.ServerError("attribute.CheckAttributes", err) + return + } + attrs := attrsMap[ctx.Repo.TreePath] + if attrs == nil { + // this case shouldn't happe, just in case. + setting.PanicInDevOrTesting("no attributes found for %s", ctx.Repo.TreePath) + attrs = attribute.NewAttributes() + } switch { case isRepresentableAsText: @@ -212,19 +226,7 @@ func prepareToRenderFile(ctx *context.Context, entry *git.TreeEntry) { ctx.Data["NumLines"] = bytes.Count(buf, []byte{'\n'}) + 1 } - var language string - attributes = append(attributes, attribute.LinguistLanguage, attribute.GitlabLanguage) - attrsMap, err := attribute.CheckAttributes(ctx, ctx.Repo.GitRepo, ctx.Repo.CommitID, attribute.CheckAttributeOpts{ - Filenames: []string{ctx.Repo.TreePath}, - Attributes: attributes, - }) - if err != nil { - log.Error("Unable to get file language for %-v:%s. Error: %v", ctx.Repo.Repository, ctx.Repo.TreePath, err) - } else { - attrs = attrsMap[ctx.Repo.TreePath] // then it will be reused out of the switch block - language = attrs.GetLanguage().Value() - } - + language := attrs.GetLanguage().Value() fileContent, lexerName, err := highlight.File(blob.Name(), language, buf) ctx.Data["LexerName"] = lexerName if err != nil { @@ -294,18 +296,6 @@ func prepareToRenderFile(ctx *context.Context, entry *git.TreeEntry) { } } - if attrs == nil { - attrsMap, err := attribute.CheckAttributes(ctx, ctx.Repo.GitRepo, ctx.Repo.CommitID, attribute.CheckAttributeOpts{ - Filenames: []string{ctx.Repo.TreePath}, - Attributes: attributes, - }) - if err != nil { - ctx.ServerError("attribute.CheckAttributes", err) - return - } - attrs = attrsMap[ctx.Repo.TreePath] - } - ctx.Data["IsVendored"], ctx.Data["IsGenerated"] = attrs.GetVendored().Value(), attrs.GetGenerated().Value() if fInfo.st.IsImage() && !fInfo.st.IsSvgImage() { diff --git a/services/repository/files/update.go b/services/repository/files/update.go index 3ae7ad8f3716a..75ede4976f9a2 100644 --- a/services/repository/files/update.go +++ b/services/repository/files/update.go @@ -490,14 +490,14 @@ func CreateOrUpdateFile(ctx context.Context, t *TemporaryUploadRepository, file if setting.LFS.StartServer && hasOldBranch { // Check there is no way this can return multiple infos attributesMap, err := attribute.CheckAttributes(ctx, t.gitRepo, "" /* use temp repo's working dir */, attribute.CheckAttributeOpts{ - Attributes: []string{"filter"}, + Attributes: []string{attribute.Filter}, Filenames: []string{file.Options.treePath}, }) if err != nil { return err } - if attributesMap[file.Options.treePath] != nil && attributesMap[file.Options.treePath]["filter"] == "lfs" { + if attributesMap[file.Options.treePath] != nil && attributesMap[file.Options.treePath].Get(attribute.Filter).ToString().Value() == "lfs" { // OK so we are supposed to LFS this data! pointer, err := lfs.GeneratePointer(treeObjectContentReader) if err != nil { diff --git a/services/repository/files/upload.go b/services/repository/files/upload.go index 87062b55b8e32..f348cb68ab543 100644 --- a/services/repository/files/upload.go +++ b/services/repository/files/upload.go @@ -106,10 +106,10 @@ func UploadRepoFiles(ctx context.Context, repo *repo_model.Repository, doer *use } } - var attributesMap map[string]attribute.Attributes + var attributesMap map[string]*attribute.Attributes if setting.LFS.StartServer { attributesMap, err = attribute.CheckAttributes(ctx, t.gitRepo, "" /* use temp repo's working dir */, attribute.CheckAttributeOpts{ - Attributes: []string{"filter"}, + Attributes: []string{attribute.Filter}, Filenames: names, }) if err != nil { @@ -176,7 +176,7 @@ func UploadRepoFiles(ctx context.Context, repo *repo_model.Repository, doer *use return repo_model.DeleteUploads(ctx, uploads...) } -func copyUploadedLFSFileIntoRepository(ctx context.Context, info *uploadInfo, attributesMap map[string]attribute.Attributes, t *TemporaryUploadRepository, treePath string) error { +func copyUploadedLFSFileIntoRepository(ctx context.Context, info *uploadInfo, attributesMap map[string]*attribute.Attributes, t *TemporaryUploadRepository, treePath string) error { file, err := os.Open(info.upload.LocalPath()) if err != nil { return err @@ -184,7 +184,7 @@ func copyUploadedLFSFileIntoRepository(ctx context.Context, info *uploadInfo, at defer file.Close() var objectHash string - if setting.LFS.StartServer && attributesMap[info.upload.Name] != nil && attributesMap[info.upload.Name]["filter"] == "lfs" { + if setting.LFS.StartServer && attributesMap[info.upload.Name] != nil && attributesMap[info.upload.Name].Get(attribute.Filter).ToString().Value() == "lfs" { // Handle LFS // FIXME: Inefficient! this should probably happen in models.Upload pointer, err := lfs.GeneratePointer(file) From a0cfb36717ccd165e435d2d182152bc62094a89d Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Fri, 11 Apr 2025 15:19:38 +0800 Subject: [PATCH 22/23] improve test and comment --- modules/git/attribute/batch_test.go | 24 ++++++++++++++---------- modules/git/attribute/checker.go | 4 ++-- modules/git/attribute/checker_test.go | 6 +++--- 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/modules/git/attribute/batch_test.go b/modules/git/attribute/batch_test.go index 08114df06ac9f..30a3d805fe983 100644 --- a/modules/git/attribute/batch_test.go +++ b/modules/git/attribute/batch_test.go @@ -102,13 +102,17 @@ func Test_nulSeparatedAttributeWriter_ReadAttribute(t *testing.T) { }, attr) } -var expectedAttrs = Attributes{ - LinguistGenerated: "unspecified", - LinguistDetectable: "unspecified", - LinguistDocumentation: "unspecified", - LinguistVendored: "unspecified", - LinguistLanguage: "Python", - GitlabLanguage: "unspecified", +func expectedAttrs() *Attributes { + return &Attributes{ + m: map[string]Attribute{ + LinguistGenerated: "unspecified", + LinguistDetectable: "unspecified", + LinguistDocumentation: "unspecified", + LinguistVendored: "unspecified", + LinguistLanguage: "Python", + GitlabLanguage: "unspecified", + }, + } } func Test_BatchChecker(t *testing.T) { @@ -127,7 +131,7 @@ func Test_BatchChecker(t *testing.T) { defer checker.Close() attributes, err := checker.CheckPath("i-am-a-python.p") assert.NoError(t, err) - assert.Equal(t, expectedAttrs, attributes) + assert.Equal(t, expectedAttrs(), attributes) }) // run git check-attr on work tree @@ -148,7 +152,7 @@ func Test_BatchChecker(t *testing.T) { defer checker.Close() attributes, err := checker.CheckPath("i-am-a-python.p") assert.NoError(t, err) - assert.Equal(t, expectedAttrs, attributes) + assert.Equal(t, expectedAttrs(), attributes) }) if !git.DefaultFeatures().SupportCheckAttrOnBare { @@ -163,6 +167,6 @@ func Test_BatchChecker(t *testing.T) { attributes, err := checker.CheckPath("i-am-a-python.p") assert.NoError(t, err) - assert.Equal(t, expectedAttrs, attributes) + assert.Equal(t, expectedAttrs(), attributes) }) } diff --git a/modules/git/attribute/checker.go b/modules/git/attribute/checker.go index 0ecb6976ff713..c17006a15407b 100644 --- a/modules/git/attribute/checker.go +++ b/modules/git/attribute/checker.go @@ -21,7 +21,7 @@ func checkAttrCommand(gitRepo *git.Repository, treeish string, filenames, attrib cmd.AddArguments("--all") } - // there is treeish, read from source or index + // there is treeish, read from bare repo or temp index created by "read-tree" if treeish != "" { if git.DefaultFeatures().SupportCheckAttrOnBare { cmd.AddArguments("--source") @@ -39,7 +39,7 @@ func checkAttrCommand(gitRepo *git.Repository, treeish string, filenames, attrib ) cancel = deleteTemporaryFile } - } // no treeish, read from working directory + } // else: no treeish, assume it is a not a bare repo, read from working directory cmd.AddDynamicArguments(attributes...) if len(filenames) > 0 { diff --git a/modules/git/attribute/checker_test.go b/modules/git/attribute/checker_test.go index 661b87c2b27da..97db43460bb81 100644 --- a/modules/git/attribute/checker_test.go +++ b/modules/git/attribute/checker_test.go @@ -32,7 +32,7 @@ func Test_Checker(t *testing.T) { }) assert.NoError(t, err) assert.Len(t, attrs, 1) - assert.Equal(t, expectedAttrs, attrs["i-am-a-python.p"]) + assert.Equal(t, expectedAttrs(), attrs["i-am-a-python.p"]) }) // run git check-attr on work tree @@ -54,7 +54,7 @@ func Test_Checker(t *testing.T) { }) assert.NoError(t, err) assert.Len(t, attrs, 1) - assert.Equal(t, expectedAttrs, attrs["i-am-a-python.p"]) + assert.Equal(t, expectedAttrs(), attrs["i-am-a-python.p"]) }) if !git.DefaultFeatures().SupportCheckAttrOnBare { @@ -69,6 +69,6 @@ func Test_Checker(t *testing.T) { }) assert.NoError(t, err) assert.Len(t, attrs, 1) - assert.Equal(t, expectedAttrs, attrs["i-am-a-python.p"]) + assert.Equal(t, expectedAttrs(), attrs["i-am-a-python.p"]) }) } From fca3bcf0d9a6adc37d553530bea8a3b0b278b5bf Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Fri, 11 Apr 2025 20:13:33 +0800 Subject: [PATCH 23/23] Update routers/web/repo/view_file.go Co-authored-by: yp05327 <576951401@qq.com> --- routers/web/repo/view_file.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/routers/web/repo/view_file.go b/routers/web/repo/view_file.go index 8ce3724f6233e..ff0e1b4d54c72 100644 --- a/routers/web/repo/view_file.go +++ b/routers/web/repo/view_file.go @@ -159,7 +159,7 @@ func prepareToRenderFile(ctx *context.Context, entry *git.TreeEntry) { } attrs := attrsMap[ctx.Repo.TreePath] if attrs == nil { - // this case shouldn't happe, just in case. + // this case shouldn't happen, just in case. setting.PanicInDevOrTesting("no attributes found for %s", ctx.Repo.TreePath) attrs = attribute.NewAttributes() }