From 1006e6e13c73ad3d9e2d5682ad266b5016523485 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Tue, 16 Jan 2024 22:30:18 +0800 Subject: [PATCH 1/7] Don't remove all mirror repository's releases when mirroring --- cmd/admin.go | 4 +- modules/repository/repo.go | 367 ---------------------- services/migrations/gitea_uploader.go | 5 +- services/mirror/mirror_pull.go | 3 +- services/repository/adopt.go | 2 +- services/repository/fork.go | 2 +- services/repository/tags.go | 431 ++++++++++++++++++++++++++ tests/integration/mirror_pull_test.go | 3 +- 8 files changed, 440 insertions(+), 377 deletions(-) create mode 100644 services/repository/tags.go diff --git a/cmd/admin.go b/cmd/admin.go index 74bfa5a6c6703..a5233750e5101 100644 --- a/cmd/admin.go +++ b/cmd/admin.go @@ -12,7 +12,7 @@ import ( repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/log" - repo_module "code.gitea.io/gitea/modules/repository" + repo_service "code.gitea.io/gitea/services/repository" "github.com/urfave/cli/v2" ) @@ -134,7 +134,7 @@ func runRepoSyncReleases(_ *cli.Context) error { } log.Trace(" currentNumReleases is %d, running SyncReleasesWithTags", oldnum) - if err = repo_module.SyncReleasesWithTags(ctx, repo, gitRepo); err != nil { + if err = repo_service.SyncReleasesWithTags(ctx, repo, gitRepo); err != nil { log.Warn(" SyncReleasesWithTags: %v", err) gitRepo.Close() continue diff --git a/modules/repository/repo.go b/modules/repository/repo.go index 33363e4689bd1..fc33b502856f3 100644 --- a/modules/repository/repo.go +++ b/modules/repository/repo.go @@ -5,26 +5,16 @@ package repository import ( "context" - "errors" "fmt" "io" - "net/http" "strings" - "time" - "code.gitea.io/gitea/models/db" git_model "code.gitea.io/gitea/models/git" - "code.gitea.io/gitea/models/organization" repo_model "code.gitea.io/gitea/models/repo" - user_model "code.gitea.io/gitea/models/user" - "code.gitea.io/gitea/modules/container" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/lfs" "code.gitea.io/gitea/modules/log" - "code.gitea.io/gitea/modules/migration" "code.gitea.io/gitea/modules/setting" - "code.gitea.io/gitea/modules/timeutil" - "code.gitea.io/gitea/modules/util" ) /* @@ -46,204 +36,6 @@ func WikiRemoteURL(ctx context.Context, remote string) string { return "" } -// MigrateRepositoryGitData starts migrating git related data after created migrating repository -func MigrateRepositoryGitData(ctx context.Context, u *user_model.User, - repo *repo_model.Repository, opts migration.MigrateOptions, - httpTransport *http.Transport, -) (*repo_model.Repository, error) { - repoPath := repo_model.RepoPath(u.Name, opts.RepoName) - - if u.IsOrganization() { - t, err := organization.OrgFromUser(u).GetOwnerTeam(ctx) - if err != nil { - return nil, err - } - repo.NumWatches = t.NumMembers - } else { - repo.NumWatches = 1 - } - - migrateTimeout := time.Duration(setting.Git.Timeout.Migrate) * time.Second - - var err error - if err = util.RemoveAll(repoPath); err != nil { - return repo, fmt.Errorf("Failed to remove %s: %w", repoPath, err) - } - - if err = git.Clone(ctx, opts.CloneAddr, repoPath, git.CloneRepoOptions{ - Mirror: true, - Quiet: true, - Timeout: migrateTimeout, - SkipTLSVerify: setting.Migrations.SkipTLSVerify, - }); err != nil { - if errors.Is(err, context.DeadlineExceeded) { - return repo, fmt.Errorf("Clone timed out. Consider increasing [git.timeout] MIGRATE in app.ini. Underlying Error: %w", err) - } - return repo, fmt.Errorf("Clone: %w", err) - } - - if err := git.WriteCommitGraph(ctx, repoPath); err != nil { - return repo, err - } - - if opts.Wiki { - wikiPath := repo_model.WikiPath(u.Name, opts.RepoName) - wikiRemotePath := WikiRemoteURL(ctx, opts.CloneAddr) - if len(wikiRemotePath) > 0 { - if err := util.RemoveAll(wikiPath); err != nil { - return repo, fmt.Errorf("Failed to remove %s: %w", wikiPath, err) - } - - if err := git.Clone(ctx, wikiRemotePath, wikiPath, git.CloneRepoOptions{ - Mirror: true, - Quiet: true, - Timeout: migrateTimeout, - Branch: "master", - SkipTLSVerify: setting.Migrations.SkipTLSVerify, - }); err != nil { - log.Warn("Clone wiki: %v", err) - if err := util.RemoveAll(wikiPath); err != nil { - return repo, fmt.Errorf("Failed to remove %s: %w", wikiPath, err) - } - } else { - if err := git.WriteCommitGraph(ctx, wikiPath); err != nil { - return repo, err - } - } - } - } - - if repo.OwnerID == u.ID { - repo.Owner = u - } - - if err = CheckDaemonExportOK(ctx, repo); err != nil { - return repo, fmt.Errorf("checkDaemonExportOK: %w", err) - } - - if stdout, _, err := git.NewCommand(ctx, "update-server-info"). - SetDescription(fmt.Sprintf("MigrateRepositoryGitData(git update-server-info): %s", repoPath)). - RunStdString(&git.RunOpts{Dir: repoPath}); err != nil { - log.Error("MigrateRepositoryGitData(git update-server-info) in %v: Stdout: %s\nError: %v", repo, stdout, err) - return repo, fmt.Errorf("error in MigrateRepositoryGitData(git update-server-info): %w", err) - } - - gitRepo, err := git.OpenRepository(ctx, repoPath) - if err != nil { - return repo, fmt.Errorf("OpenRepository: %w", err) - } - defer gitRepo.Close() - - repo.IsEmpty, err = gitRepo.IsEmpty() - if err != nil { - return repo, fmt.Errorf("git.IsEmpty: %w", err) - } - - if !repo.IsEmpty { - if len(repo.DefaultBranch) == 0 { - // Try to get HEAD branch and set it as default branch. - headBranch, err := gitRepo.GetHEADBranch() - if err != nil { - return repo, fmt.Errorf("GetHEADBranch: %w", err) - } - if headBranch != nil { - repo.DefaultBranch = headBranch.Name - } - } - - if _, err := SyncRepoBranchesWithRepo(ctx, repo, gitRepo, u.ID); err != nil { - return repo, fmt.Errorf("SyncRepoBranchesWithRepo: %v", err) - } - - if !opts.Releases { - // note: this will greatly improve release (tag) sync - // for pull-mirrors with many tags - repo.IsMirror = opts.Mirror - if err = SyncReleasesWithTags(ctx, repo, gitRepo); err != nil { - log.Error("Failed to synchronize tags to releases for repository: %v", err) - } - } - - if opts.LFS { - endpoint := lfs.DetermineEndpoint(opts.CloneAddr, opts.LFSEndpoint) - lfsClient := lfs.NewClient(endpoint, httpTransport) - if err = StoreMissingLfsObjectsInRepository(ctx, repo, gitRepo, lfsClient); err != nil { - log.Error("Failed to store missing LFS objects for repository: %v", err) - } - } - } - - ctx, committer, err := db.TxContext(ctx) - if err != nil { - return nil, err - } - defer committer.Close() - - if opts.Mirror { - remoteAddress, err := util.SanitizeURL(opts.CloneAddr) - if err != nil { - return repo, err - } - mirrorModel := repo_model.Mirror{ - RepoID: repo.ID, - Interval: setting.Mirror.DefaultInterval, - EnablePrune: true, - NextUpdateUnix: timeutil.TimeStampNow().AddDuration(setting.Mirror.DefaultInterval), - LFS: opts.LFS, - RemoteAddress: remoteAddress, - } - if opts.LFS { - mirrorModel.LFSEndpoint = opts.LFSEndpoint - } - - if opts.MirrorInterval != "" { - parsedInterval, err := time.ParseDuration(opts.MirrorInterval) - if err != nil { - log.Error("Failed to set Interval: %v", err) - return repo, err - } - if parsedInterval == 0 { - mirrorModel.Interval = 0 - mirrorModel.NextUpdateUnix = 0 - } else if parsedInterval < setting.Mirror.MinInterval { - err := fmt.Errorf("interval %s is set below Minimum Interval of %s", parsedInterval, setting.Mirror.MinInterval) - log.Error("Interval: %s is too frequent", opts.MirrorInterval) - return repo, err - } else { - mirrorModel.Interval = parsedInterval - mirrorModel.NextUpdateUnix = timeutil.TimeStampNow().AddDuration(parsedInterval) - } - } - - if err = repo_model.InsertMirror(ctx, &mirrorModel); err != nil { - return repo, fmt.Errorf("InsertOne: %w", err) - } - - repo.IsMirror = true - if err = UpdateRepository(ctx, repo, false); err != nil { - return nil, err - } - - // this is necessary for sync local tags from remote - configName := fmt.Sprintf("remote.%s.fetch", mirrorModel.GetRemoteName()) - if stdout, _, err := git.NewCommand(ctx, "config"). - AddOptionValues("--add", configName, `+refs/tags/*:refs/tags/*`). - RunStdString(&git.RunOpts{Dir: repoPath}); err != nil { - log.Error("MigrateRepositoryGitData(git config --add +refs/tags/*:refs/tags/*) in %v: Stdout: %s\nError: %v", repo, stdout, err) - return repo, fmt.Errorf("error in MigrateRepositoryGitData(git config --add +refs/tags/*:refs/tags/*): %w", err) - } - } else { - if err = UpdateRepoSize(ctx, repo); err != nil { - log.Error("Failed to update size for repository: %v", err) - } - if repo, err = CleanUpMigrateInfo(ctx, repo); err != nil { - return nil, err - } - } - - return repo, committer.Commit() -} - // cleanUpMigrateGitConfig removes mirror info which prevents "push --all". // This also removes possible user credentials. func cleanUpMigrateGitConfig(ctx context.Context, repoPath string) error { @@ -284,116 +76,6 @@ func CleanUpMigrateInfo(ctx context.Context, repo *repo_model.Repository) (*repo return repo, UpdateRepository(ctx, repo, false) } -// SyncReleasesWithTags synchronizes release table with repository tags -func SyncReleasesWithTags(ctx context.Context, repo *repo_model.Repository, gitRepo *git.Repository) error { - log.Debug("SyncReleasesWithTags: in Repo[%d:%s/%s]", repo.ID, repo.OwnerName, repo.Name) - - // optimized procedure for pull-mirrors which saves a lot of time (in - // particular for repos with many tags). - if repo.IsMirror { - return pullMirrorReleaseSync(ctx, repo, gitRepo) - } - - existingRelTags := make(container.Set[string]) - opts := repo_model.FindReleasesOptions{ - IncludeDrafts: true, - IncludeTags: true, - ListOptions: db.ListOptions{PageSize: 50}, - RepoID: repo.ID, - } - for page := 1; ; page++ { - opts.Page = page - rels, err := db.Find[repo_model.Release](gitRepo.Ctx, opts) - if err != nil { - return fmt.Errorf("unable to GetReleasesByRepoID in Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err) - } - if len(rels) == 0 { - break - } - for _, rel := range rels { - if rel.IsDraft { - continue - } - commitID, err := gitRepo.GetTagCommitID(rel.TagName) - if err != nil && !git.IsErrNotExist(err) { - return fmt.Errorf("unable to GetTagCommitID for %q in Repo[%d:%s/%s]: %w", rel.TagName, repo.ID, repo.OwnerName, repo.Name, err) - } - if git.IsErrNotExist(err) || commitID != rel.Sha1 { - if err := repo_model.PushUpdateDeleteTag(ctx, repo, rel.TagName); err != nil { - return fmt.Errorf("unable to PushUpdateDeleteTag: %q in Repo[%d:%s/%s]: %w", rel.TagName, repo.ID, repo.OwnerName, repo.Name, err) - } - } else { - existingRelTags.Add(strings.ToLower(rel.TagName)) - } - } - } - - _, err := gitRepo.WalkReferences(git.ObjectTag, 0, 0, func(sha1, refname string) error { - tagName := strings.TrimPrefix(refname, git.TagPrefix) - if existingRelTags.Contains(strings.ToLower(tagName)) { - return nil - } - - if err := PushUpdateAddTag(ctx, repo, gitRepo, tagName, sha1, refname); err != nil { - return fmt.Errorf("unable to PushUpdateAddTag: %q to Repo[%d:%s/%s]: %w", tagName, repo.ID, repo.OwnerName, repo.Name, err) - } - - return nil - }) - return err -} - -// PushUpdateAddTag must be called for any push actions to add tag -func PushUpdateAddTag(ctx context.Context, repo *repo_model.Repository, gitRepo *git.Repository, tagName, sha1, refname string) error { - tag, err := gitRepo.GetTagWithID(sha1, tagName) - if err != nil { - return fmt.Errorf("unable to GetTag: %w", err) - } - commit, err := tag.Commit(gitRepo) - if err != nil { - return fmt.Errorf("unable to get tag Commit: %w", err) - } - - sig := tag.Tagger - if sig == nil { - sig = commit.Author - } - if sig == nil { - sig = commit.Committer - } - - var author *user_model.User - createdAt := time.Unix(1, 0) - - if sig != nil { - author, err = user_model.GetUserByEmail(ctx, sig.Email) - if err != nil && !user_model.IsErrUserNotExist(err) { - return fmt.Errorf("unable to GetUserByEmail for %q: %w", sig.Email, err) - } - createdAt = sig.When - } - - commitsCount, err := commit.CommitsCount() - if err != nil { - return fmt.Errorf("unable to get CommitsCount: %w", err) - } - - rel := repo_model.Release{ - RepoID: repo.ID, - TagName: tagName, - LowerTagName: strings.ToLower(tagName), - Sha1: commit.ID.String(), - NumCommits: commitsCount, - CreatedUnix: timeutil.TimeStamp(createdAt.Unix()), - IsTag: true, - } - if author != nil { - rel.PublisherID = author.ID - } - - return repo_model.SaveOrUpdateTag(ctx, repo, &rel) -} - // StoreMissingLfsObjectsInRepository downloads missing LFS objects func StoreMissingLfsObjectsInRepository(ctx context.Context, repo *repo_model.Repository, gitRepo *git.Repository, lfsClient lfs.Client) error { contentStore := lfs.NewContentStore() @@ -491,52 +173,3 @@ func StoreMissingLfsObjectsInRepository(ctx context.Context, repo *repo_model.Re return nil } - -// pullMirrorReleaseSync is a pull-mirror specific tag<->release table -// synchronization which overwrites all Releases from the repository tags. This -// can be relied on since a pull-mirror is always identical to its -// upstream. Hence, after each sync we want the pull-mirror release set to be -// identical to the upstream tag set. This is much more efficient for -// repositories like https://github.com/vim/vim (with over 13000 tags). -func pullMirrorReleaseSync(ctx context.Context, repo *repo_model.Repository, gitRepo *git.Repository) error { - log.Trace("pullMirrorReleaseSync: rebuilding releases for pull-mirror Repo[%d:%s/%s]", repo.ID, repo.OwnerName, repo.Name) - tags, numTags, err := gitRepo.GetTagInfos(0, 0) - if err != nil { - return fmt.Errorf("unable to GetTagInfos in pull-mirror Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err) - } - err = db.WithTx(ctx, func(ctx context.Context) error { - // - // clear out existing releases - // - if _, err := db.DeleteByBean(ctx, &repo_model.Release{RepoID: repo.ID}); err != nil { - return fmt.Errorf("unable to clear releases for pull-mirror Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err) - } - // - // make release set identical to upstream tags - // - for _, tag := range tags { - release := repo_model.Release{ - RepoID: repo.ID, - TagName: tag.Name, - LowerTagName: strings.ToLower(tag.Name), - Sha1: tag.Object.String(), - // NOTE: ignored, since NumCommits are unused - // for pull-mirrors (only relevant when - // displaying releases, IsTag: false) - NumCommits: -1, - CreatedUnix: timeutil.TimeStamp(tag.Tagger.When.Unix()), - IsTag: true, - } - if err := db.Insert(ctx, release); err != nil { - return fmt.Errorf("unable insert tag %s for pull-mirror Repo[%d:%s/%s]: %w", tag.Name, repo.ID, repo.OwnerName, repo.Name, err) - } - } - return nil - }) - if err != nil { - return fmt.Errorf("unable to rebuild release table for pull-mirror Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err) - } - - log.Trace("pullMirrorReleaseSync: done rebuilding %d releases", numTags) - return nil -} diff --git a/services/migrations/gitea_uploader.go b/services/migrations/gitea_uploader.go index 23d855d615d7c..a6f8da2b1c63f 100644 --- a/services/migrations/gitea_uploader.go +++ b/services/migrations/gitea_uploader.go @@ -23,7 +23,6 @@ import ( "code.gitea.io/gitea/modules/label" "code.gitea.io/gitea/modules/log" base "code.gitea.io/gitea/modules/migration" - repo_module "code.gitea.io/gitea/modules/repository" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/storage" "code.gitea.io/gitea/modules/structs" @@ -118,7 +117,7 @@ func (g *GiteaLocalUploader) CreateRepo(repo *base.Repository, opts base.Migrate r.DefaultBranch = repo.DefaultBranch r.Description = repo.Description - r, err = repo_module.MigrateRepositoryGitData(g.ctx, owner, r, base.MigrateOptions{ + r, err = repo_service.MigrateRepositoryGitData(g.ctx, owner, r, base.MigrateOptions{ RepoName: g.repoName, Description: repo.Description, OriginalURL: repo.OriginalURL, @@ -355,7 +354,7 @@ func (g *GiteaLocalUploader) CreateReleases(releases ...*base.Release) error { // SyncTags syncs releases with tags in the database func (g *GiteaLocalUploader) SyncTags() error { - return repo_module.SyncReleasesWithTags(g.ctx, g.repo, g.gitRepo) + return repo_service.SyncReleasesWithTags(g.ctx, g.repo, g.gitRepo) } // CreateIssues creates issues diff --git a/services/mirror/mirror_pull.go b/services/mirror/mirror_pull.go index 6f03e14ab08bd..ef54bcc13c75f 100644 --- a/services/mirror/mirror_pull.go +++ b/services/mirror/mirror_pull.go @@ -22,6 +22,7 @@ import ( "code.gitea.io/gitea/modules/timeutil" "code.gitea.io/gitea/modules/util" notify_service "code.gitea.io/gitea/services/notify" + repo_service "code.gitea.io/gitea/services/repository" ) // gitShortEmptySha Git short empty SHA @@ -312,7 +313,7 @@ func runSync(ctx context.Context, m *repo_model.Mirror) ([]*mirrorSyncResult, bo } log.Trace("SyncMirrors [repo: %-v]: syncing releases with tags...", m.Repo) - if err = repo_module.SyncReleasesWithTags(ctx, m.Repo, gitRepo); err != nil { + if err = repo_service.SyncReleasesWithTags(ctx, m.Repo, gitRepo); err != nil { log.Error("SyncMirrors [repo: %-v]: failed to synchronize tags to releases: %v", m.Repo, err) } diff --git a/services/repository/adopt.go b/services/repository/adopt.go index 2e9b0c822f957..ab96e1c3d00db 100644 --- a/services/repository/adopt.go +++ b/services/repository/adopt.go @@ -195,7 +195,7 @@ func adoptRepository(ctx context.Context, repoPath string, u *user_model.User, r return fmt.Errorf("updateRepository: %w", err) } - if err = repo_module.SyncReleasesWithTags(ctx, repo, gitRepo); err != nil { + if err = SyncReleasesWithTags(ctx, repo, gitRepo); err != nil { return fmt.Errorf("SyncReleasesWithTags: %w", err) } diff --git a/services/repository/fork.go b/services/repository/fork.go index 851a69c80f4b1..dc2f7b4af556a 100644 --- a/services/repository/fork.go +++ b/services/repository/fork.go @@ -194,7 +194,7 @@ func ForkRepository(ctx context.Context, doer, owner *user_model.User, opts Fork log.Error("Open created git repository failed: %v", err) } else { defer gitRepo.Close() - if err := repo_module.SyncReleasesWithTags(ctx, repo, gitRepo); err != nil { + if err := SyncReleasesWithTags(ctx, repo, gitRepo); err != nil { log.Error("Sync releases from git tags failed: %v", err) } } diff --git a/services/repository/tags.go b/services/repository/tags.go new file mode 100644 index 0000000000000..f0cdc3034302d --- /dev/null +++ b/services/repository/tags.go @@ -0,0 +1,431 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package repository + +import ( + "context" + "errors" + "fmt" + "net/http" + "strings" + "time" + + "code.gitea.io/gitea/models/db" + "code.gitea.io/gitea/models/organization" + repo_model "code.gitea.io/gitea/models/repo" + user_model "code.gitea.io/gitea/models/user" + "code.gitea.io/gitea/modules/container" + "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/lfs" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/migration" + repo_module "code.gitea.io/gitea/modules/repository" + "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/timeutil" + "code.gitea.io/gitea/modules/util" +) + +// MigrateRepositoryGitData starts migrating git related data after created migrating repository +func MigrateRepositoryGitData(ctx context.Context, u *user_model.User, + repo *repo_model.Repository, opts migration.MigrateOptions, + httpTransport *http.Transport, +) (*repo_model.Repository, error) { + repoPath := repo_model.RepoPath(u.Name, opts.RepoName) + + if u.IsOrganization() { + t, err := organization.OrgFromUser(u).GetOwnerTeam(ctx) + if err != nil { + return nil, err + } + repo.NumWatches = t.NumMembers + } else { + repo.NumWatches = 1 + } + + migrateTimeout := time.Duration(setting.Git.Timeout.Migrate) * time.Second + + var err error + if err = util.RemoveAll(repoPath); err != nil { + return repo, fmt.Errorf("Failed to remove %s: %w", repoPath, err) + } + + if err = git.Clone(ctx, opts.CloneAddr, repoPath, git.CloneRepoOptions{ + Mirror: true, + Quiet: true, + Timeout: migrateTimeout, + SkipTLSVerify: setting.Migrations.SkipTLSVerify, + }); err != nil { + if errors.Is(err, context.DeadlineExceeded) { + return repo, fmt.Errorf("Clone timed out. Consider increasing [git.timeout] MIGRATE in app.ini. Underlying Error: %w", err) + } + return repo, fmt.Errorf("Clone: %w", err) + } + + if err := git.WriteCommitGraph(ctx, repoPath); err != nil { + return repo, err + } + + if opts.Wiki { + wikiPath := repo_model.WikiPath(u.Name, opts.RepoName) + wikiRemotePath := repo_module.WikiRemoteURL(ctx, opts.CloneAddr) + if len(wikiRemotePath) > 0 { + if err := util.RemoveAll(wikiPath); err != nil { + return repo, fmt.Errorf("Failed to remove %s: %w", wikiPath, err) + } + + if err := git.Clone(ctx, wikiRemotePath, wikiPath, git.CloneRepoOptions{ + Mirror: true, + Quiet: true, + Timeout: migrateTimeout, + Branch: "master", + SkipTLSVerify: setting.Migrations.SkipTLSVerify, + }); err != nil { + log.Warn("Clone wiki: %v", err) + if err := util.RemoveAll(wikiPath); err != nil { + return repo, fmt.Errorf("Failed to remove %s: %w", wikiPath, err) + } + } else { + if err := git.WriteCommitGraph(ctx, wikiPath); err != nil { + return repo, err + } + } + } + } + + if repo.OwnerID == u.ID { + repo.Owner = u + } + + if err = repo_module.CheckDaemonExportOK(ctx, repo); err != nil { + return repo, fmt.Errorf("checkDaemonExportOK: %w", err) + } + + if stdout, _, err := git.NewCommand(ctx, "update-server-info"). + SetDescription(fmt.Sprintf("MigrateRepositoryGitData(git update-server-info): %s", repoPath)). + RunStdString(&git.RunOpts{Dir: repoPath}); err != nil { + log.Error("MigrateRepositoryGitData(git update-server-info) in %v: Stdout: %s\nError: %v", repo, stdout, err) + return repo, fmt.Errorf("error in MigrateRepositoryGitData(git update-server-info): %w", err) + } + + gitRepo, err := git.OpenRepository(ctx, repoPath) + if err != nil { + return repo, fmt.Errorf("OpenRepository: %w", err) + } + defer gitRepo.Close() + + repo.IsEmpty, err = gitRepo.IsEmpty() + if err != nil { + return repo, fmt.Errorf("git.IsEmpty: %w", err) + } + + if !repo.IsEmpty { + if len(repo.DefaultBranch) == 0 { + // Try to get HEAD branch and set it as default branch. + headBranch, err := gitRepo.GetHEADBranch() + if err != nil { + return repo, fmt.Errorf("GetHEADBranch: %w", err) + } + if headBranch != nil { + repo.DefaultBranch = headBranch.Name + } + } + + if _, err := repo_module.SyncRepoBranchesWithRepo(ctx, repo, gitRepo, u.ID); err != nil { + return repo, fmt.Errorf("SyncRepoBranchesWithRepo: %v", err) + } + + if !opts.Releases { + // note: this will greatly improve release (tag) sync + // for pull-mirrors with many tags + repo.IsMirror = opts.Mirror + if err = SyncReleasesWithTags(ctx, repo, gitRepo); err != nil { + log.Error("Failed to synchronize tags to releases for repository: %v", err) + } + } + + if opts.LFS { + endpoint := lfs.DetermineEndpoint(opts.CloneAddr, opts.LFSEndpoint) + lfsClient := lfs.NewClient(endpoint, httpTransport) + if err = repo_module.StoreMissingLfsObjectsInRepository(ctx, repo, gitRepo, lfsClient); err != nil { + log.Error("Failed to store missing LFS objects for repository: %v", err) + } + } + } + + ctx, committer, err := db.TxContext(ctx) + if err != nil { + return nil, err + } + defer committer.Close() + + if opts.Mirror { + remoteAddress, err := util.SanitizeURL(opts.CloneAddr) + if err != nil { + return repo, err + } + mirrorModel := repo_model.Mirror{ + RepoID: repo.ID, + Interval: setting.Mirror.DefaultInterval, + EnablePrune: true, + NextUpdateUnix: timeutil.TimeStampNow().AddDuration(setting.Mirror.DefaultInterval), + LFS: opts.LFS, + RemoteAddress: remoteAddress, + } + if opts.LFS { + mirrorModel.LFSEndpoint = opts.LFSEndpoint + } + + if opts.MirrorInterval != "" { + parsedInterval, err := time.ParseDuration(opts.MirrorInterval) + if err != nil { + log.Error("Failed to set Interval: %v", err) + return repo, err + } + if parsedInterval == 0 { + mirrorModel.Interval = 0 + mirrorModel.NextUpdateUnix = 0 + } else if parsedInterval < setting.Mirror.MinInterval { + err := fmt.Errorf("interval %s is set below Minimum Interval of %s", parsedInterval, setting.Mirror.MinInterval) + log.Error("Interval: %s is too frequent", opts.MirrorInterval) + return repo, err + } else { + mirrorModel.Interval = parsedInterval + mirrorModel.NextUpdateUnix = timeutil.TimeStampNow().AddDuration(parsedInterval) + } + } + + if err = repo_model.InsertMirror(ctx, &mirrorModel); err != nil { + return repo, fmt.Errorf("InsertOne: %w", err) + } + + repo.IsMirror = true + if err = UpdateRepository(ctx, repo, false); err != nil { + return nil, err + } + + // this is necessary for sync local tags from remote + configName := fmt.Sprintf("remote.%s.fetch", mirrorModel.GetRemoteName()) + if stdout, _, err := git.NewCommand(ctx, "config"). + AddOptionValues("--add", configName, `+refs/tags/*:refs/tags/*`). + RunStdString(&git.RunOpts{Dir: repoPath}); err != nil { + log.Error("MigrateRepositoryGitData(git config --add +refs/tags/*:refs/tags/*) in %v: Stdout: %s\nError: %v", repo, stdout, err) + return repo, fmt.Errorf("error in MigrateRepositoryGitData(git config --add +refs/tags/*:refs/tags/*): %w", err) + } + } else { + if err = repo_module.UpdateRepoSize(ctx, repo); err != nil { + log.Error("Failed to update size for repository: %v", err) + } + if repo, err = repo_module.CleanUpMigrateInfo(ctx, repo); err != nil { + return nil, err + } + } + + return repo, committer.Commit() +} + +// SyncReleasesWithTags synchronizes release table with repository tags +func SyncReleasesWithTags(ctx context.Context, repo *repo_model.Repository, gitRepo *git.Repository) error { + log.Debug("SyncReleasesWithTags: in Repo[%d:%s/%s]", repo.ID, repo.OwnerName, repo.Name) + + // optimized procedure for pull-mirrors which saves a lot of time (in + // particular for repos with many tags). + if repo.IsMirror { + return pullMirrorReleaseSync(ctx, repo, gitRepo) + } + + existingRelTags := make(container.Set[string]) + opts := repo_model.FindReleasesOptions{ + IncludeDrafts: true, + IncludeTags: true, + ListOptions: db.ListOptions{PageSize: 50}, + RepoID: repo.ID, + } + for page := 1; ; page++ { + opts.Page = page + rels, err := db.Find[repo_model.Release](gitRepo.Ctx, opts) + if err != nil { + return fmt.Errorf("unable to GetReleasesByRepoID in Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err) + } + if len(rels) == 0 { + break + } + for _, rel := range rels { + if rel.IsDraft { + continue + } + commitID, err := gitRepo.GetTagCommitID(rel.TagName) + if err != nil && !git.IsErrNotExist(err) { + return fmt.Errorf("unable to GetTagCommitID for %q in Repo[%d:%s/%s]: %w", rel.TagName, repo.ID, repo.OwnerName, repo.Name, err) + } + if git.IsErrNotExist(err) || commitID != rel.Sha1 { + if err := repo_model.PushUpdateDeleteTag(ctx, repo, rel.TagName); err != nil { + return fmt.Errorf("unable to PushUpdateDeleteTag: %q in Repo[%d:%s/%s]: %w", rel.TagName, repo.ID, repo.OwnerName, repo.Name, err) + } + } else { + existingRelTags.Add(strings.ToLower(rel.TagName)) + } + } + } + + _, err := gitRepo.WalkReferences(git.ObjectTag, 0, 0, func(sha1, refname string) error { + tagName := strings.TrimPrefix(refname, git.TagPrefix) + if existingRelTags.Contains(strings.ToLower(tagName)) { + return nil + } + + if err := PushUpdateAddTag(ctx, repo, gitRepo, tagName, sha1, refname); err != nil { + return fmt.Errorf("unable to PushUpdateAddTag: %q to Repo[%d:%s/%s]: %w", tagName, repo.ID, repo.OwnerName, repo.Name, err) + } + + return nil + }) + return err +} + +// PushUpdateAddTag must be called for any push actions to add tag +func PushUpdateAddTag(ctx context.Context, repo *repo_model.Repository, gitRepo *git.Repository, tagName, sha1, refname string) error { + tag, err := gitRepo.GetTagWithID(sha1, tagName) + if err != nil { + return fmt.Errorf("unable to GetTag: %w", err) + } + commit, err := tag.Commit(gitRepo) + if err != nil { + return fmt.Errorf("unable to get tag Commit: %w", err) + } + + sig := tag.Tagger + if sig == nil { + sig = commit.Author + } + if sig == nil { + sig = commit.Committer + } + + var author *user_model.User + createdAt := time.Unix(1, 0) + + if sig != nil { + author, err = user_model.GetUserByEmail(ctx, sig.Email) + if err != nil && !user_model.IsErrUserNotExist(err) { + return fmt.Errorf("unable to GetUserByEmail for %q: %w", sig.Email, err) + } + createdAt = sig.When + } + + commitsCount, err := commit.CommitsCount() + if err != nil { + return fmt.Errorf("unable to get CommitsCount: %w", err) + } + + rel := repo_model.Release{ + RepoID: repo.ID, + TagName: tagName, + LowerTagName: strings.ToLower(tagName), + Sha1: commit.ID.String(), + NumCommits: commitsCount, + CreatedUnix: timeutil.TimeStamp(createdAt.Unix()), + IsTag: true, + } + if author != nil { + rel.PublisherID = author.ID + } + + return repo_model.SaveOrUpdateTag(ctx, repo, &rel) +} + +func calcSync(destTags []*git.Tag, dbTags []*repo_model.Release) ([]*git.Tag, []int64, []*git.Tag) { + destTagMap := make(map[string]*git.Tag) + for _, tag := range destTags { + destTagMap[tag.Name] = tag + } + dbTagMap := make(map[string]*repo_model.Release) + for _, rel := range dbTags { + dbTagMap[rel.TagName] = rel + } + + inserted := make([]*git.Tag, 0, 10) + updated := make([]*git.Tag, 0, 10) + for _, tag := range destTags { + rel := dbTagMap[tag.Name] + if rel != nil { + inserted = append(inserted, tag) + } else if rel.Sha1 != tag.Object.String() { + updated = append(updated, tag) + } + } + deleted := make([]int64, 0, 10) + for _, tag := range dbTags { + if destTagMap[tag.TagName] == nil && tag.IsTag { + deleted = append(deleted, tag.ID) + } + } + return inserted, deleted, updated +} + +// pullMirrorReleaseSync is a pull-mirror specific tag<->release table +// synchronization which overwrites all Releases from the repository tags. This +// can be relied on since a pull-mirror is always identical to its +// upstream. Hence, after each sync we want the pull-mirror release set to be +// identical to the upstream tag set. This is much more efficient for +// repositories like https://github.com/vim/vim (with over 13000 tags). +func pullMirrorReleaseSync(ctx context.Context, repo *repo_model.Repository, gitRepo *git.Repository) error { + log.Trace("pullMirrorReleaseSync: rebuilding releases for pull-mirror Repo[%d:%s/%s]", repo.ID, repo.OwnerName, repo.Name) + tags, numTags, err := gitRepo.GetTagInfos(0, 0) + if err != nil { + return fmt.Errorf("unable to GetTagInfos in pull-mirror Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err) + } + err = db.WithTx(ctx, func(ctx context.Context) error { + dbReleases, err := db.Find[repo_model.Release](ctx, repo_model.FindReleasesOptions{ + RepoID: repo.ID, + }) + if err != nil { + return fmt.Errorf("unable to FindReleases in pull-mirror Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err) + } + + inserts, deletes, updates := calcSync(tags, dbReleases) + // + // make release set identical to upstream tags + // + for _, tag := range inserts { + release := repo_model.Release{ + RepoID: repo.ID, + TagName: tag.Name, + LowerTagName: strings.ToLower(tag.Name), + Sha1: tag.Object.String(), + // NOTE: ignored, since NumCommits are unused + // for pull-mirrors (only relevant when + // displaying releases, IsTag: false) + NumCommits: -1, + CreatedUnix: timeutil.TimeStamp(tag.Tagger.When.Unix()), + IsTag: true, + } + if err := db.Insert(ctx, release); err != nil { + return fmt.Errorf("unable insert tag %s for pull-mirror Repo[%d:%s/%s]: %w", tag.Name, repo.ID, repo.OwnerName, repo.Name, err) + } + } + + // only delete tags releases + if _, err := db.GetEngine(ctx).Where("repo_id=?", repo.ID). + In("id", deletes).Delete(&repo_model.Release{}); err != nil { + return fmt.Errorf("unable to delete tags for pull-mirror Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err) + } + + for _, tag := range updates { + if _, err := db.GetEngine(ctx).Where("repo_id = ? AND lower_tag_name = ?", repo.ID, strings.ToLower(tag.Name)). + Cols("sha1"). + Update(&repo_model.Release{ + Sha1: tag.Object.String(), + }); err != nil { + return fmt.Errorf("unable to update tag %s for pull-mirror Repo[%d:%s/%s]: %w", tag.Name, repo.ID, repo.OwnerName, repo.Name, err) + } + } + return nil + }) + if err != nil { + return fmt.Errorf("unable to rebuild release table for pull-mirror Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err) + } + + log.Trace("pullMirrorReleaseSync: done rebuilding %d releases", numTags) + return nil +} diff --git a/tests/integration/mirror_pull_test.go b/tests/integration/mirror_pull_test.go index 1e0edd9a2dd07..bc0c7599a4c39 100644 --- a/tests/integration/mirror_pull_test.go +++ b/tests/integration/mirror_pull_test.go @@ -13,7 +13,6 @@ import ( user_model "code.gitea.io/gitea/models/user" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/migration" - "code.gitea.io/gitea/modules/repository" mirror_service "code.gitea.io/gitea/services/mirror" release_service "code.gitea.io/gitea/services/release" repo_service "code.gitea.io/gitea/services/repository" @@ -51,7 +50,7 @@ func TestMirrorPull(t *testing.T) { ctx := context.Background() - mirror, err := repository.MigrateRepositoryGitData(ctx, user, mirrorRepo, opts, nil) + mirror, err := repo_service.MigrateRepositoryGitData(ctx, user, mirrorRepo, opts, nil) assert.NoError(t, err) gitRepo, err := git.OpenRepository(git.DefaultContext, repoPath) From bbdb7df30248e7d4a26a909c8d2598a152e13868 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Wed, 17 Jan 2024 00:41:23 +0800 Subject: [PATCH 2/7] Fix bug --- services/repository/tags.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/repository/tags.go b/services/repository/tags.go index f0cdc3034302d..151a331e6d1cc 100644 --- a/services/repository/tags.go +++ b/services/repository/tags.go @@ -348,7 +348,7 @@ func calcSync(destTags []*git.Tag, dbTags []*repo_model.Release) ([]*git.Tag, [] updated := make([]*git.Tag, 0, 10) for _, tag := range destTags { rel := dbTagMap[tag.Name] - if rel != nil { + if rel == nil { inserted = append(inserted, tag) } else if rel.Sha1 != tag.Object.String() { updated = append(updated, tag) From 53ab18dcecf4152b58328d1f47429510eb414d50 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Wed, 17 Jan 2024 09:47:19 +0800 Subject: [PATCH 3/7] revert refactor change --- cmd/admin.go | 4 +- modules/repository/repo.go | 417 +++++++++++++++++++++++++ services/migrations/gitea_uploader.go | 5 +- services/mirror/mirror_pull.go | 3 +- services/repository/adopt.go | 2 +- services/repository/fork.go | 2 +- services/repository/tags.go | 431 -------------------------- tests/integration/mirror_pull_test.go | 3 +- 8 files changed, 427 insertions(+), 440 deletions(-) delete mode 100644 services/repository/tags.go diff --git a/cmd/admin.go b/cmd/admin.go index a5233750e5101..74bfa5a6c6703 100644 --- a/cmd/admin.go +++ b/cmd/admin.go @@ -12,7 +12,7 @@ import ( repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/log" - repo_service "code.gitea.io/gitea/services/repository" + repo_module "code.gitea.io/gitea/modules/repository" "github.com/urfave/cli/v2" ) @@ -134,7 +134,7 @@ func runRepoSyncReleases(_ *cli.Context) error { } log.Trace(" currentNumReleases is %d, running SyncReleasesWithTags", oldnum) - if err = repo_service.SyncReleasesWithTags(ctx, repo, gitRepo); err != nil { + if err = repo_module.SyncReleasesWithTags(ctx, repo, gitRepo); err != nil { log.Warn(" SyncReleasesWithTags: %v", err) gitRepo.Close() continue diff --git a/modules/repository/repo.go b/modules/repository/repo.go index fc33b502856f3..95e6b1aadb6df 100644 --- a/modules/repository/repo.go +++ b/modules/repository/repo.go @@ -5,16 +5,26 @@ package repository import ( "context" + "errors" "fmt" "io" + "net/http" "strings" + "time" + "code.gitea.io/gitea/models/db" git_model "code.gitea.io/gitea/models/git" + "code.gitea.io/gitea/models/organization" repo_model "code.gitea.io/gitea/models/repo" + user_model "code.gitea.io/gitea/models/user" + "code.gitea.io/gitea/modules/container" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/lfs" "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/migration" "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/timeutil" + "code.gitea.io/gitea/modules/util" ) /* @@ -36,6 +46,204 @@ func WikiRemoteURL(ctx context.Context, remote string) string { return "" } +// MigrateRepositoryGitData starts migrating git related data after created migrating repository +func MigrateRepositoryGitData(ctx context.Context, u *user_model.User, + repo *repo_model.Repository, opts migration.MigrateOptions, + httpTransport *http.Transport, +) (*repo_model.Repository, error) { + repoPath := repo_model.RepoPath(u.Name, opts.RepoName) + + if u.IsOrganization() { + t, err := organization.OrgFromUser(u).GetOwnerTeam(ctx) + if err != nil { + return nil, err + } + repo.NumWatches = t.NumMembers + } else { + repo.NumWatches = 1 + } + + migrateTimeout := time.Duration(setting.Git.Timeout.Migrate) * time.Second + + var err error + if err = util.RemoveAll(repoPath); err != nil { + return repo, fmt.Errorf("Failed to remove %s: %w", repoPath, err) + } + + if err = git.Clone(ctx, opts.CloneAddr, repoPath, git.CloneRepoOptions{ + Mirror: true, + Quiet: true, + Timeout: migrateTimeout, + SkipTLSVerify: setting.Migrations.SkipTLSVerify, + }); err != nil { + if errors.Is(err, context.DeadlineExceeded) { + return repo, fmt.Errorf("Clone timed out. Consider increasing [git.timeout] MIGRATE in app.ini. Underlying Error: %w", err) + } + return repo, fmt.Errorf("Clone: %w", err) + } + + if err := git.WriteCommitGraph(ctx, repoPath); err != nil { + return repo, err + } + + if opts.Wiki { + wikiPath := repo_model.WikiPath(u.Name, opts.RepoName) + wikiRemotePath := WikiRemoteURL(ctx, opts.CloneAddr) + if len(wikiRemotePath) > 0 { + if err := util.RemoveAll(wikiPath); err != nil { + return repo, fmt.Errorf("Failed to remove %s: %w", wikiPath, err) + } + + if err := git.Clone(ctx, wikiRemotePath, wikiPath, git.CloneRepoOptions{ + Mirror: true, + Quiet: true, + Timeout: migrateTimeout, + Branch: "master", + SkipTLSVerify: setting.Migrations.SkipTLSVerify, + }); err != nil { + log.Warn("Clone wiki: %v", err) + if err := util.RemoveAll(wikiPath); err != nil { + return repo, fmt.Errorf("Failed to remove %s: %w", wikiPath, err) + } + } else { + if err := git.WriteCommitGraph(ctx, wikiPath); err != nil { + return repo, err + } + } + } + } + + if repo.OwnerID == u.ID { + repo.Owner = u + } + + if err = CheckDaemonExportOK(ctx, repo); err != nil { + return repo, fmt.Errorf("checkDaemonExportOK: %w", err) + } + + if stdout, _, err := git.NewCommand(ctx, "update-server-info"). + SetDescription(fmt.Sprintf("MigrateRepositoryGitData(git update-server-info): %s", repoPath)). + RunStdString(&git.RunOpts{Dir: repoPath}); err != nil { + log.Error("MigrateRepositoryGitData(git update-server-info) in %v: Stdout: %s\nError: %v", repo, stdout, err) + return repo, fmt.Errorf("error in MigrateRepositoryGitData(git update-server-info): %w", err) + } + + gitRepo, err := git.OpenRepository(ctx, repoPath) + if err != nil { + return repo, fmt.Errorf("OpenRepository: %w", err) + } + defer gitRepo.Close() + + repo.IsEmpty, err = gitRepo.IsEmpty() + if err != nil { + return repo, fmt.Errorf("git.IsEmpty: %w", err) + } + + if !repo.IsEmpty { + if len(repo.DefaultBranch) == 0 { + // Try to get HEAD branch and set it as default branch. + headBranch, err := gitRepo.GetHEADBranch() + if err != nil { + return repo, fmt.Errorf("GetHEADBranch: %w", err) + } + if headBranch != nil { + repo.DefaultBranch = headBranch.Name + } + } + + if _, err := SyncRepoBranchesWithRepo(ctx, repo, gitRepo, u.ID); err != nil { + return repo, fmt.Errorf("SyncRepoBranchesWithRepo: %v", err) + } + + if !opts.Releases { + // note: this will greatly improve release (tag) sync + // for pull-mirrors with many tags + repo.IsMirror = opts.Mirror + if err = SyncReleasesWithTags(ctx, repo, gitRepo); err != nil { + log.Error("Failed to synchronize tags to releases for repository: %v", err) + } + } + + if opts.LFS { + endpoint := lfs.DetermineEndpoint(opts.CloneAddr, opts.LFSEndpoint) + lfsClient := lfs.NewClient(endpoint, httpTransport) + if err = StoreMissingLfsObjectsInRepository(ctx, repo, gitRepo, lfsClient); err != nil { + log.Error("Failed to store missing LFS objects for repository: %v", err) + } + } + } + + ctx, committer, err := db.TxContext(ctx) + if err != nil { + return nil, err + } + defer committer.Close() + + if opts.Mirror { + remoteAddress, err := util.SanitizeURL(opts.CloneAddr) + if err != nil { + return repo, err + } + mirrorModel := repo_model.Mirror{ + RepoID: repo.ID, + Interval: setting.Mirror.DefaultInterval, + EnablePrune: true, + NextUpdateUnix: timeutil.TimeStampNow().AddDuration(setting.Mirror.DefaultInterval), + LFS: opts.LFS, + RemoteAddress: remoteAddress, + } + if opts.LFS { + mirrorModel.LFSEndpoint = opts.LFSEndpoint + } + + if opts.MirrorInterval != "" { + parsedInterval, err := time.ParseDuration(opts.MirrorInterval) + if err != nil { + log.Error("Failed to set Interval: %v", err) + return repo, err + } + if parsedInterval == 0 { + mirrorModel.Interval = 0 + mirrorModel.NextUpdateUnix = 0 + } else if parsedInterval < setting.Mirror.MinInterval { + err := fmt.Errorf("interval %s is set below Minimum Interval of %s", parsedInterval, setting.Mirror.MinInterval) + log.Error("Interval: %s is too frequent", opts.MirrorInterval) + return repo, err + } else { + mirrorModel.Interval = parsedInterval + mirrorModel.NextUpdateUnix = timeutil.TimeStampNow().AddDuration(parsedInterval) + } + } + + if err = repo_model.InsertMirror(ctx, &mirrorModel); err != nil { + return repo, fmt.Errorf("InsertOne: %w", err) + } + + repo.IsMirror = true + if err = UpdateRepository(ctx, repo, false); err != nil { + return nil, err + } + + // this is necessary for sync local tags from remote + configName := fmt.Sprintf("remote.%s.fetch", mirrorModel.GetRemoteName()) + if stdout, _, err := git.NewCommand(ctx, "config"). + AddOptionValues("--add", configName, `+refs/tags/*:refs/tags/*`). + RunStdString(&git.RunOpts{Dir: repoPath}); err != nil { + log.Error("MigrateRepositoryGitData(git config --add +refs/tags/*:refs/tags/*) in %v: Stdout: %s\nError: %v", repo, stdout, err) + return repo, fmt.Errorf("error in MigrateRepositoryGitData(git config --add +refs/tags/*:refs/tags/*): %w", err) + } + } else { + if err = UpdateRepoSize(ctx, repo); err != nil { + log.Error("Failed to update size for repository: %v", err) + } + if repo, err = CleanUpMigrateInfo(ctx, repo); err != nil { + return nil, err + } + } + + return repo, committer.Commit() +} + // cleanUpMigrateGitConfig removes mirror info which prevents "push --all". // This also removes possible user credentials. func cleanUpMigrateGitConfig(ctx context.Context, repoPath string) error { @@ -76,6 +284,116 @@ func CleanUpMigrateInfo(ctx context.Context, repo *repo_model.Repository) (*repo return repo, UpdateRepository(ctx, repo, false) } +// SyncReleasesWithTags synchronizes release table with repository tags +func SyncReleasesWithTags(ctx context.Context, repo *repo_model.Repository, gitRepo *git.Repository) error { + log.Debug("SyncReleasesWithTags: in Repo[%d:%s/%s]", repo.ID, repo.OwnerName, repo.Name) + + // optimized procedure for pull-mirrors which saves a lot of time (in + // particular for repos with many tags). + if repo.IsMirror { + return pullMirrorReleaseSync(ctx, repo, gitRepo) + } + + existingRelTags := make(container.Set[string]) + opts := repo_model.FindReleasesOptions{ + IncludeDrafts: true, + IncludeTags: true, + ListOptions: db.ListOptions{PageSize: 50}, + RepoID: repo.ID, + } + for page := 1; ; page++ { + opts.Page = page + rels, err := db.Find[repo_model.Release](gitRepo.Ctx, opts) + if err != nil { + return fmt.Errorf("unable to GetReleasesByRepoID in Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err) + } + if len(rels) == 0 { + break + } + for _, rel := range rels { + if rel.IsDraft { + continue + } + commitID, err := gitRepo.GetTagCommitID(rel.TagName) + if err != nil && !git.IsErrNotExist(err) { + return fmt.Errorf("unable to GetTagCommitID for %q in Repo[%d:%s/%s]: %w", rel.TagName, repo.ID, repo.OwnerName, repo.Name, err) + } + if git.IsErrNotExist(err) || commitID != rel.Sha1 { + if err := repo_model.PushUpdateDeleteTag(ctx, repo, rel.TagName); err != nil { + return fmt.Errorf("unable to PushUpdateDeleteTag: %q in Repo[%d:%s/%s]: %w", rel.TagName, repo.ID, repo.OwnerName, repo.Name, err) + } + } else { + existingRelTags.Add(strings.ToLower(rel.TagName)) + } + } + } + + _, err := gitRepo.WalkReferences(git.ObjectTag, 0, 0, func(sha1, refname string) error { + tagName := strings.TrimPrefix(refname, git.TagPrefix) + if existingRelTags.Contains(strings.ToLower(tagName)) { + return nil + } + + if err := PushUpdateAddTag(ctx, repo, gitRepo, tagName, sha1, refname); err != nil { + return fmt.Errorf("unable to PushUpdateAddTag: %q to Repo[%d:%s/%s]: %w", tagName, repo.ID, repo.OwnerName, repo.Name, err) + } + + return nil + }) + return err +} + +// PushUpdateAddTag must be called for any push actions to add tag +func PushUpdateAddTag(ctx context.Context, repo *repo_model.Repository, gitRepo *git.Repository, tagName, sha1, refname string) error { + tag, err := gitRepo.GetTagWithID(sha1, tagName) + if err != nil { + return fmt.Errorf("unable to GetTag: %w", err) + } + commit, err := tag.Commit(gitRepo) + if err != nil { + return fmt.Errorf("unable to get tag Commit: %w", err) + } + + sig := tag.Tagger + if sig == nil { + sig = commit.Author + } + if sig == nil { + sig = commit.Committer + } + + var author *user_model.User + createdAt := time.Unix(1, 0) + + if sig != nil { + author, err = user_model.GetUserByEmail(ctx, sig.Email) + if err != nil && !user_model.IsErrUserNotExist(err) { + return fmt.Errorf("unable to GetUserByEmail for %q: %w", sig.Email, err) + } + createdAt = sig.When + } + + commitsCount, err := commit.CommitsCount() + if err != nil { + return fmt.Errorf("unable to get CommitsCount: %w", err) + } + + rel := repo_model.Release{ + RepoID: repo.ID, + TagName: tagName, + LowerTagName: strings.ToLower(tagName), + Sha1: commit.ID.String(), + NumCommits: commitsCount, + CreatedUnix: timeutil.TimeStamp(createdAt.Unix()), + IsTag: true, + } + if author != nil { + rel.PublisherID = author.ID + } + + return repo_model.SaveOrUpdateTag(ctx, repo, &rel) +} + // StoreMissingLfsObjectsInRepository downloads missing LFS objects func StoreMissingLfsObjectsInRepository(ctx context.Context, repo *repo_model.Repository, gitRepo *git.Repository, lfsClient lfs.Client) error { contentStore := lfs.NewContentStore() @@ -173,3 +491,102 @@ func StoreMissingLfsObjectsInRepository(ctx context.Context, repo *repo_model.Re return nil } + +// pullMirrorReleaseSync is a pull-mirror specific tag<->release table +// synchronization which overwrites all Releases from the repository tags. This +// can be relied on since a pull-mirror is always identical to its +// upstream. Hence, after each sync we want the pull-mirror release set to be +// identical to the upstream tag set. This is much more efficient for +// repositories like https://github.com/vim/vim (with over 13000 tags). +func pullMirrorReleaseSync(ctx context.Context, repo *repo_model.Repository, gitRepo *git.Repository) error { + log.Trace("pullMirrorReleaseSync: rebuilding releases for pull-mirror Repo[%d:%s/%s]", repo.ID, repo.OwnerName, repo.Name) + tags, numTags, err := gitRepo.GetTagInfos(0, 0) + if err != nil { + return fmt.Errorf("unable to GetTagInfos in pull-mirror Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err) + } + err = db.WithTx(ctx, func(ctx context.Context) error { + dbReleases, err := db.Find[repo_model.Release](ctx, repo_model.FindReleasesOptions{ + RepoID: repo.ID, + }) + if err != nil { + return fmt.Errorf("unable to FindReleases in pull-mirror Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err) + } + + inserts, deletes, updates := calcSync(tags, dbReleases) + // + // make release set identical to upstream tags + // + for _, tag := range inserts { + release := repo_model.Release{ + RepoID: repo.ID, + TagName: tag.Name, + LowerTagName: strings.ToLower(tag.Name), + Sha1: tag.Object.String(), + // NOTE: ignored, since NumCommits are unused + // for pull-mirrors (only relevant when + // displaying releases, IsTag: false) + NumCommits: -1, + CreatedUnix: timeutil.TimeStamp(tag.Tagger.When.Unix()), + IsTag: true, + } + if err := db.Insert(ctx, release); err != nil { + return fmt.Errorf("unable insert tag %s for pull-mirror Repo[%d:%s/%s]: %w", tag.Name, repo.ID, repo.OwnerName, repo.Name, err) + } + } + + // only delete tags releases + if len(deletes) > 0 { + if _, err := db.GetEngine(ctx).Where("repo_id=?", repo.ID). + In("id", deletes). + Delete(&repo_model.Release{}); err != nil { + return fmt.Errorf("unable to delete tags for pull-mirror Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err) + } + } + + for _, tag := range updates { + if _, err := db.GetEngine(ctx).Where("repo_id = ? AND lower_tag_name = ?", repo.ID, strings.ToLower(tag.Name)). + Cols("sha1"). + Update(&repo_model.Release{ + Sha1: tag.Object.String(), + }); err != nil { + return fmt.Errorf("unable to update tag %s for pull-mirror Repo[%d:%s/%s]: %w", tag.Name, repo.ID, repo.OwnerName, repo.Name, err) + } + } + return nil + }) + if err != nil { + return fmt.Errorf("unable to rebuild release table for pull-mirror Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err) + } + + log.Trace("pullMirrorReleaseSync: done rebuilding %d releases", numTags) + return nil +} + +func calcSync(destTags []*git.Tag, dbTags []*repo_model.Release) ([]*git.Tag, []int64, []*git.Tag) { + destTagMap := make(map[string]*git.Tag) + for _, tag := range destTags { + destTagMap[tag.Name] = tag + } + dbTagMap := make(map[string]*repo_model.Release) + for _, rel := range dbTags { + dbTagMap[rel.TagName] = rel + } + + inserted := make([]*git.Tag, 0, 10) + updated := make([]*git.Tag, 0, 10) + for _, tag := range destTags { + rel := dbTagMap[tag.Name] + if rel == nil { + inserted = append(inserted, tag) + } else if rel.Sha1 != tag.Object.String() { + updated = append(updated, tag) + } + } + deleted := make([]int64, 0, 10) + for _, tag := range dbTags { + if destTagMap[tag.TagName] == nil && tag.IsTag { + deleted = append(deleted, tag.ID) + } + } + return inserted, deleted, updated +} diff --git a/services/migrations/gitea_uploader.go b/services/migrations/gitea_uploader.go index a6f8da2b1c63f..23d855d615d7c 100644 --- a/services/migrations/gitea_uploader.go +++ b/services/migrations/gitea_uploader.go @@ -23,6 +23,7 @@ import ( "code.gitea.io/gitea/modules/label" "code.gitea.io/gitea/modules/log" base "code.gitea.io/gitea/modules/migration" + repo_module "code.gitea.io/gitea/modules/repository" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/storage" "code.gitea.io/gitea/modules/structs" @@ -117,7 +118,7 @@ func (g *GiteaLocalUploader) CreateRepo(repo *base.Repository, opts base.Migrate r.DefaultBranch = repo.DefaultBranch r.Description = repo.Description - r, err = repo_service.MigrateRepositoryGitData(g.ctx, owner, r, base.MigrateOptions{ + r, err = repo_module.MigrateRepositoryGitData(g.ctx, owner, r, base.MigrateOptions{ RepoName: g.repoName, Description: repo.Description, OriginalURL: repo.OriginalURL, @@ -354,7 +355,7 @@ func (g *GiteaLocalUploader) CreateReleases(releases ...*base.Release) error { // SyncTags syncs releases with tags in the database func (g *GiteaLocalUploader) SyncTags() error { - return repo_service.SyncReleasesWithTags(g.ctx, g.repo, g.gitRepo) + return repo_module.SyncReleasesWithTags(g.ctx, g.repo, g.gitRepo) } // CreateIssues creates issues diff --git a/services/mirror/mirror_pull.go b/services/mirror/mirror_pull.go index ef54bcc13c75f..6f03e14ab08bd 100644 --- a/services/mirror/mirror_pull.go +++ b/services/mirror/mirror_pull.go @@ -22,7 +22,6 @@ import ( "code.gitea.io/gitea/modules/timeutil" "code.gitea.io/gitea/modules/util" notify_service "code.gitea.io/gitea/services/notify" - repo_service "code.gitea.io/gitea/services/repository" ) // gitShortEmptySha Git short empty SHA @@ -313,7 +312,7 @@ func runSync(ctx context.Context, m *repo_model.Mirror) ([]*mirrorSyncResult, bo } log.Trace("SyncMirrors [repo: %-v]: syncing releases with tags...", m.Repo) - if err = repo_service.SyncReleasesWithTags(ctx, m.Repo, gitRepo); err != nil { + if err = repo_module.SyncReleasesWithTags(ctx, m.Repo, gitRepo); err != nil { log.Error("SyncMirrors [repo: %-v]: failed to synchronize tags to releases: %v", m.Repo, err) } diff --git a/services/repository/adopt.go b/services/repository/adopt.go index ab96e1c3d00db..2e9b0c822f957 100644 --- a/services/repository/adopt.go +++ b/services/repository/adopt.go @@ -195,7 +195,7 @@ func adoptRepository(ctx context.Context, repoPath string, u *user_model.User, r return fmt.Errorf("updateRepository: %w", err) } - if err = SyncReleasesWithTags(ctx, repo, gitRepo); err != nil { + if err = repo_module.SyncReleasesWithTags(ctx, repo, gitRepo); err != nil { return fmt.Errorf("SyncReleasesWithTags: %w", err) } diff --git a/services/repository/fork.go b/services/repository/fork.go index dc2f7b4af556a..851a69c80f4b1 100644 --- a/services/repository/fork.go +++ b/services/repository/fork.go @@ -194,7 +194,7 @@ func ForkRepository(ctx context.Context, doer, owner *user_model.User, opts Fork log.Error("Open created git repository failed: %v", err) } else { defer gitRepo.Close() - if err := SyncReleasesWithTags(ctx, repo, gitRepo); err != nil { + if err := repo_module.SyncReleasesWithTags(ctx, repo, gitRepo); err != nil { log.Error("Sync releases from git tags failed: %v", err) } } diff --git a/services/repository/tags.go b/services/repository/tags.go deleted file mode 100644 index 151a331e6d1cc..0000000000000 --- a/services/repository/tags.go +++ /dev/null @@ -1,431 +0,0 @@ -// Copyright 2024 The Gitea Authors. All rights reserved. -// SPDX-License-Identifier: MIT - -package repository - -import ( - "context" - "errors" - "fmt" - "net/http" - "strings" - "time" - - "code.gitea.io/gitea/models/db" - "code.gitea.io/gitea/models/organization" - repo_model "code.gitea.io/gitea/models/repo" - user_model "code.gitea.io/gitea/models/user" - "code.gitea.io/gitea/modules/container" - "code.gitea.io/gitea/modules/git" - "code.gitea.io/gitea/modules/lfs" - "code.gitea.io/gitea/modules/log" - "code.gitea.io/gitea/modules/migration" - repo_module "code.gitea.io/gitea/modules/repository" - "code.gitea.io/gitea/modules/setting" - "code.gitea.io/gitea/modules/timeutil" - "code.gitea.io/gitea/modules/util" -) - -// MigrateRepositoryGitData starts migrating git related data after created migrating repository -func MigrateRepositoryGitData(ctx context.Context, u *user_model.User, - repo *repo_model.Repository, opts migration.MigrateOptions, - httpTransport *http.Transport, -) (*repo_model.Repository, error) { - repoPath := repo_model.RepoPath(u.Name, opts.RepoName) - - if u.IsOrganization() { - t, err := organization.OrgFromUser(u).GetOwnerTeam(ctx) - if err != nil { - return nil, err - } - repo.NumWatches = t.NumMembers - } else { - repo.NumWatches = 1 - } - - migrateTimeout := time.Duration(setting.Git.Timeout.Migrate) * time.Second - - var err error - if err = util.RemoveAll(repoPath); err != nil { - return repo, fmt.Errorf("Failed to remove %s: %w", repoPath, err) - } - - if err = git.Clone(ctx, opts.CloneAddr, repoPath, git.CloneRepoOptions{ - Mirror: true, - Quiet: true, - Timeout: migrateTimeout, - SkipTLSVerify: setting.Migrations.SkipTLSVerify, - }); err != nil { - if errors.Is(err, context.DeadlineExceeded) { - return repo, fmt.Errorf("Clone timed out. Consider increasing [git.timeout] MIGRATE in app.ini. Underlying Error: %w", err) - } - return repo, fmt.Errorf("Clone: %w", err) - } - - if err := git.WriteCommitGraph(ctx, repoPath); err != nil { - return repo, err - } - - if opts.Wiki { - wikiPath := repo_model.WikiPath(u.Name, opts.RepoName) - wikiRemotePath := repo_module.WikiRemoteURL(ctx, opts.CloneAddr) - if len(wikiRemotePath) > 0 { - if err := util.RemoveAll(wikiPath); err != nil { - return repo, fmt.Errorf("Failed to remove %s: %w", wikiPath, err) - } - - if err := git.Clone(ctx, wikiRemotePath, wikiPath, git.CloneRepoOptions{ - Mirror: true, - Quiet: true, - Timeout: migrateTimeout, - Branch: "master", - SkipTLSVerify: setting.Migrations.SkipTLSVerify, - }); err != nil { - log.Warn("Clone wiki: %v", err) - if err := util.RemoveAll(wikiPath); err != nil { - return repo, fmt.Errorf("Failed to remove %s: %w", wikiPath, err) - } - } else { - if err := git.WriteCommitGraph(ctx, wikiPath); err != nil { - return repo, err - } - } - } - } - - if repo.OwnerID == u.ID { - repo.Owner = u - } - - if err = repo_module.CheckDaemonExportOK(ctx, repo); err != nil { - return repo, fmt.Errorf("checkDaemonExportOK: %w", err) - } - - if stdout, _, err := git.NewCommand(ctx, "update-server-info"). - SetDescription(fmt.Sprintf("MigrateRepositoryGitData(git update-server-info): %s", repoPath)). - RunStdString(&git.RunOpts{Dir: repoPath}); err != nil { - log.Error("MigrateRepositoryGitData(git update-server-info) in %v: Stdout: %s\nError: %v", repo, stdout, err) - return repo, fmt.Errorf("error in MigrateRepositoryGitData(git update-server-info): %w", err) - } - - gitRepo, err := git.OpenRepository(ctx, repoPath) - if err != nil { - return repo, fmt.Errorf("OpenRepository: %w", err) - } - defer gitRepo.Close() - - repo.IsEmpty, err = gitRepo.IsEmpty() - if err != nil { - return repo, fmt.Errorf("git.IsEmpty: %w", err) - } - - if !repo.IsEmpty { - if len(repo.DefaultBranch) == 0 { - // Try to get HEAD branch and set it as default branch. - headBranch, err := gitRepo.GetHEADBranch() - if err != nil { - return repo, fmt.Errorf("GetHEADBranch: %w", err) - } - if headBranch != nil { - repo.DefaultBranch = headBranch.Name - } - } - - if _, err := repo_module.SyncRepoBranchesWithRepo(ctx, repo, gitRepo, u.ID); err != nil { - return repo, fmt.Errorf("SyncRepoBranchesWithRepo: %v", err) - } - - if !opts.Releases { - // note: this will greatly improve release (tag) sync - // for pull-mirrors with many tags - repo.IsMirror = opts.Mirror - if err = SyncReleasesWithTags(ctx, repo, gitRepo); err != nil { - log.Error("Failed to synchronize tags to releases for repository: %v", err) - } - } - - if opts.LFS { - endpoint := lfs.DetermineEndpoint(opts.CloneAddr, opts.LFSEndpoint) - lfsClient := lfs.NewClient(endpoint, httpTransport) - if err = repo_module.StoreMissingLfsObjectsInRepository(ctx, repo, gitRepo, lfsClient); err != nil { - log.Error("Failed to store missing LFS objects for repository: %v", err) - } - } - } - - ctx, committer, err := db.TxContext(ctx) - if err != nil { - return nil, err - } - defer committer.Close() - - if opts.Mirror { - remoteAddress, err := util.SanitizeURL(opts.CloneAddr) - if err != nil { - return repo, err - } - mirrorModel := repo_model.Mirror{ - RepoID: repo.ID, - Interval: setting.Mirror.DefaultInterval, - EnablePrune: true, - NextUpdateUnix: timeutil.TimeStampNow().AddDuration(setting.Mirror.DefaultInterval), - LFS: opts.LFS, - RemoteAddress: remoteAddress, - } - if opts.LFS { - mirrorModel.LFSEndpoint = opts.LFSEndpoint - } - - if opts.MirrorInterval != "" { - parsedInterval, err := time.ParseDuration(opts.MirrorInterval) - if err != nil { - log.Error("Failed to set Interval: %v", err) - return repo, err - } - if parsedInterval == 0 { - mirrorModel.Interval = 0 - mirrorModel.NextUpdateUnix = 0 - } else if parsedInterval < setting.Mirror.MinInterval { - err := fmt.Errorf("interval %s is set below Minimum Interval of %s", parsedInterval, setting.Mirror.MinInterval) - log.Error("Interval: %s is too frequent", opts.MirrorInterval) - return repo, err - } else { - mirrorModel.Interval = parsedInterval - mirrorModel.NextUpdateUnix = timeutil.TimeStampNow().AddDuration(parsedInterval) - } - } - - if err = repo_model.InsertMirror(ctx, &mirrorModel); err != nil { - return repo, fmt.Errorf("InsertOne: %w", err) - } - - repo.IsMirror = true - if err = UpdateRepository(ctx, repo, false); err != nil { - return nil, err - } - - // this is necessary for sync local tags from remote - configName := fmt.Sprintf("remote.%s.fetch", mirrorModel.GetRemoteName()) - if stdout, _, err := git.NewCommand(ctx, "config"). - AddOptionValues("--add", configName, `+refs/tags/*:refs/tags/*`). - RunStdString(&git.RunOpts{Dir: repoPath}); err != nil { - log.Error("MigrateRepositoryGitData(git config --add +refs/tags/*:refs/tags/*) in %v: Stdout: %s\nError: %v", repo, stdout, err) - return repo, fmt.Errorf("error in MigrateRepositoryGitData(git config --add +refs/tags/*:refs/tags/*): %w", err) - } - } else { - if err = repo_module.UpdateRepoSize(ctx, repo); err != nil { - log.Error("Failed to update size for repository: %v", err) - } - if repo, err = repo_module.CleanUpMigrateInfo(ctx, repo); err != nil { - return nil, err - } - } - - return repo, committer.Commit() -} - -// SyncReleasesWithTags synchronizes release table with repository tags -func SyncReleasesWithTags(ctx context.Context, repo *repo_model.Repository, gitRepo *git.Repository) error { - log.Debug("SyncReleasesWithTags: in Repo[%d:%s/%s]", repo.ID, repo.OwnerName, repo.Name) - - // optimized procedure for pull-mirrors which saves a lot of time (in - // particular for repos with many tags). - if repo.IsMirror { - return pullMirrorReleaseSync(ctx, repo, gitRepo) - } - - existingRelTags := make(container.Set[string]) - opts := repo_model.FindReleasesOptions{ - IncludeDrafts: true, - IncludeTags: true, - ListOptions: db.ListOptions{PageSize: 50}, - RepoID: repo.ID, - } - for page := 1; ; page++ { - opts.Page = page - rels, err := db.Find[repo_model.Release](gitRepo.Ctx, opts) - if err != nil { - return fmt.Errorf("unable to GetReleasesByRepoID in Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err) - } - if len(rels) == 0 { - break - } - for _, rel := range rels { - if rel.IsDraft { - continue - } - commitID, err := gitRepo.GetTagCommitID(rel.TagName) - if err != nil && !git.IsErrNotExist(err) { - return fmt.Errorf("unable to GetTagCommitID for %q in Repo[%d:%s/%s]: %w", rel.TagName, repo.ID, repo.OwnerName, repo.Name, err) - } - if git.IsErrNotExist(err) || commitID != rel.Sha1 { - if err := repo_model.PushUpdateDeleteTag(ctx, repo, rel.TagName); err != nil { - return fmt.Errorf("unable to PushUpdateDeleteTag: %q in Repo[%d:%s/%s]: %w", rel.TagName, repo.ID, repo.OwnerName, repo.Name, err) - } - } else { - existingRelTags.Add(strings.ToLower(rel.TagName)) - } - } - } - - _, err := gitRepo.WalkReferences(git.ObjectTag, 0, 0, func(sha1, refname string) error { - tagName := strings.TrimPrefix(refname, git.TagPrefix) - if existingRelTags.Contains(strings.ToLower(tagName)) { - return nil - } - - if err := PushUpdateAddTag(ctx, repo, gitRepo, tagName, sha1, refname); err != nil { - return fmt.Errorf("unable to PushUpdateAddTag: %q to Repo[%d:%s/%s]: %w", tagName, repo.ID, repo.OwnerName, repo.Name, err) - } - - return nil - }) - return err -} - -// PushUpdateAddTag must be called for any push actions to add tag -func PushUpdateAddTag(ctx context.Context, repo *repo_model.Repository, gitRepo *git.Repository, tagName, sha1, refname string) error { - tag, err := gitRepo.GetTagWithID(sha1, tagName) - if err != nil { - return fmt.Errorf("unable to GetTag: %w", err) - } - commit, err := tag.Commit(gitRepo) - if err != nil { - return fmt.Errorf("unable to get tag Commit: %w", err) - } - - sig := tag.Tagger - if sig == nil { - sig = commit.Author - } - if sig == nil { - sig = commit.Committer - } - - var author *user_model.User - createdAt := time.Unix(1, 0) - - if sig != nil { - author, err = user_model.GetUserByEmail(ctx, sig.Email) - if err != nil && !user_model.IsErrUserNotExist(err) { - return fmt.Errorf("unable to GetUserByEmail for %q: %w", sig.Email, err) - } - createdAt = sig.When - } - - commitsCount, err := commit.CommitsCount() - if err != nil { - return fmt.Errorf("unable to get CommitsCount: %w", err) - } - - rel := repo_model.Release{ - RepoID: repo.ID, - TagName: tagName, - LowerTagName: strings.ToLower(tagName), - Sha1: commit.ID.String(), - NumCommits: commitsCount, - CreatedUnix: timeutil.TimeStamp(createdAt.Unix()), - IsTag: true, - } - if author != nil { - rel.PublisherID = author.ID - } - - return repo_model.SaveOrUpdateTag(ctx, repo, &rel) -} - -func calcSync(destTags []*git.Tag, dbTags []*repo_model.Release) ([]*git.Tag, []int64, []*git.Tag) { - destTagMap := make(map[string]*git.Tag) - for _, tag := range destTags { - destTagMap[tag.Name] = tag - } - dbTagMap := make(map[string]*repo_model.Release) - for _, rel := range dbTags { - dbTagMap[rel.TagName] = rel - } - - inserted := make([]*git.Tag, 0, 10) - updated := make([]*git.Tag, 0, 10) - for _, tag := range destTags { - rel := dbTagMap[tag.Name] - if rel == nil { - inserted = append(inserted, tag) - } else if rel.Sha1 != tag.Object.String() { - updated = append(updated, tag) - } - } - deleted := make([]int64, 0, 10) - for _, tag := range dbTags { - if destTagMap[tag.TagName] == nil && tag.IsTag { - deleted = append(deleted, tag.ID) - } - } - return inserted, deleted, updated -} - -// pullMirrorReleaseSync is a pull-mirror specific tag<->release table -// synchronization which overwrites all Releases from the repository tags. This -// can be relied on since a pull-mirror is always identical to its -// upstream. Hence, after each sync we want the pull-mirror release set to be -// identical to the upstream tag set. This is much more efficient for -// repositories like https://github.com/vim/vim (with over 13000 tags). -func pullMirrorReleaseSync(ctx context.Context, repo *repo_model.Repository, gitRepo *git.Repository) error { - log.Trace("pullMirrorReleaseSync: rebuilding releases for pull-mirror Repo[%d:%s/%s]", repo.ID, repo.OwnerName, repo.Name) - tags, numTags, err := gitRepo.GetTagInfos(0, 0) - if err != nil { - return fmt.Errorf("unable to GetTagInfos in pull-mirror Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err) - } - err = db.WithTx(ctx, func(ctx context.Context) error { - dbReleases, err := db.Find[repo_model.Release](ctx, repo_model.FindReleasesOptions{ - RepoID: repo.ID, - }) - if err != nil { - return fmt.Errorf("unable to FindReleases in pull-mirror Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err) - } - - inserts, deletes, updates := calcSync(tags, dbReleases) - // - // make release set identical to upstream tags - // - for _, tag := range inserts { - release := repo_model.Release{ - RepoID: repo.ID, - TagName: tag.Name, - LowerTagName: strings.ToLower(tag.Name), - Sha1: tag.Object.String(), - // NOTE: ignored, since NumCommits are unused - // for pull-mirrors (only relevant when - // displaying releases, IsTag: false) - NumCommits: -1, - CreatedUnix: timeutil.TimeStamp(tag.Tagger.When.Unix()), - IsTag: true, - } - if err := db.Insert(ctx, release); err != nil { - return fmt.Errorf("unable insert tag %s for pull-mirror Repo[%d:%s/%s]: %w", tag.Name, repo.ID, repo.OwnerName, repo.Name, err) - } - } - - // only delete tags releases - if _, err := db.GetEngine(ctx).Where("repo_id=?", repo.ID). - In("id", deletes).Delete(&repo_model.Release{}); err != nil { - return fmt.Errorf("unable to delete tags for pull-mirror Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err) - } - - for _, tag := range updates { - if _, err := db.GetEngine(ctx).Where("repo_id = ? AND lower_tag_name = ?", repo.ID, strings.ToLower(tag.Name)). - Cols("sha1"). - Update(&repo_model.Release{ - Sha1: tag.Object.String(), - }); err != nil { - return fmt.Errorf("unable to update tag %s for pull-mirror Repo[%d:%s/%s]: %w", tag.Name, repo.ID, repo.OwnerName, repo.Name, err) - } - } - return nil - }) - if err != nil { - return fmt.Errorf("unable to rebuild release table for pull-mirror Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err) - } - - log.Trace("pullMirrorReleaseSync: done rebuilding %d releases", numTags) - return nil -} diff --git a/tests/integration/mirror_pull_test.go b/tests/integration/mirror_pull_test.go index bc0c7599a4c39..1e0edd9a2dd07 100644 --- a/tests/integration/mirror_pull_test.go +++ b/tests/integration/mirror_pull_test.go @@ -13,6 +13,7 @@ import ( user_model "code.gitea.io/gitea/models/user" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/migration" + "code.gitea.io/gitea/modules/repository" mirror_service "code.gitea.io/gitea/services/mirror" release_service "code.gitea.io/gitea/services/release" repo_service "code.gitea.io/gitea/services/repository" @@ -50,7 +51,7 @@ func TestMirrorPull(t *testing.T) { ctx := context.Background() - mirror, err := repo_service.MigrateRepositoryGitData(ctx, user, mirrorRepo, opts, nil) + mirror, err := repository.MigrateRepositoryGitData(ctx, user, mirrorRepo, opts, nil) assert.NoError(t, err) gitRepo, err := git.OpenRepository(git.DefaultContext, repoPath) From 8c76856d871b2805bc9364ddba2456277350b457 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Wed, 17 Jan 2024 10:04:22 +0800 Subject: [PATCH 4/7] Add unit test for new introduced function --- modules/repository/repo_test.go | 74 +++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 modules/repository/repo_test.go diff --git a/modules/repository/repo_test.go b/modules/repository/repo_test.go new file mode 100644 index 0000000000000..4c62b819d612d --- /dev/null +++ b/modules/repository/repo_test.go @@ -0,0 +1,74 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package repository + +import ( + "testing" + + repo_model "code.gitea.io/gitea/models/repo" + "code.gitea.io/gitea/modules/git" + + "github.com/stretchr/testify/assert" +) + +func Test_calcSync(t *testing.T) { + gitTags := []*git.Tag{ + /*{ + Name: "v0.1.0-beta", //deleted tag + Object: git.MustIDFromString(""), + }, + { + Name: "v0.1.1-beta", //deleted tag but release should not be deleted because it's a release + Object: git.MustIDFromString(""), + }, + */ + { + Name: "v1.0.0", // keep as before + Object: git.MustIDFromString("1006e6e13c73ad3d9e2d5682ad266b5016523485"), + }, + { + Name: "v1.1.0", // retagged with new commit id + Object: git.MustIDFromString("bbdb7df30248e7d4a26a909c8d2598a152e13868"), + }, + { + Name: "v1.2.0", // new tag + Object: git.MustIDFromString("a5147145e2f24d89fd6d2a87826384cc1d253267"), + }, + } + + dbReleases := []*repo_model.Release{ + { + ID: 1, + TagName: "v0.1.0-beta", + Sha1: "244758d7da8dd1d9e0727e8cb7704ed4ba9a17c3", + IsTag: true, + }, + { + ID: 2, + TagName: "v0.1.1-beta", + Sha1: "244758d7da8dd1d9e0727e8cb7704ed4ba9a17c3", + IsTag: false, + }, + { + ID: 3, + TagName: "v1.0.0", + Sha1: "1006e6e13c73ad3d9e2d5682ad266b5016523485", + }, + { + ID: 4, + TagName: "v1.1.0", + Sha1: "53ab18dcecf4152b58328d1f47429510eb414d50", + }, + } + + inserts, deletes, updates := calcSync(gitTags, dbReleases) + assert.EqualValues(t, 1, len(inserts), "inserts") + assert.EqualValues(t, *gitTags[2], *inserts[0], "inserts equal") + + assert.EqualValues(t, 1, len(deletes), "deletes") + assert.EqualValues(t, 1, deletes[0], "deletes equal") + + assert.EqualValues(t, 1, len(updates), "updates") + assert.EqualValues(t, *gitTags[1], *updates[0], "updates equal") +} From 578c65b4f4ed943ed7cdb0978b929248a5381ec0 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Wed, 17 Jan 2024 10:10:22 +0800 Subject: [PATCH 5/7] use a simple struct to reduce possible memory --- modules/repository/repo.go | 18 +++++++++++++++--- modules/repository/repo_test.go | 3 +-- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/modules/repository/repo.go b/modules/repository/repo.go index 95e6b1aadb6df..065983af3251b 100644 --- a/modules/repository/repo.go +++ b/modules/repository/repo.go @@ -492,6 +492,18 @@ func StoreMissingLfsObjectsInRepository(ctx context.Context, repo *repo_model.Re return nil } +// shortRelease to reduce load memory, this struct can replace repo_model.Release +type shortRelease struct { + ID int64 + TagName string + Sha1 string + IsTag bool +} + +func (shortRelease) TableName() string { + return "release" +} + // pullMirrorReleaseSync is a pull-mirror specific tag<->release table // synchronization which overwrites all Releases from the repository tags. This // can be relied on since a pull-mirror is always identical to its @@ -505,7 +517,7 @@ func pullMirrorReleaseSync(ctx context.Context, repo *repo_model.Repository, git return fmt.Errorf("unable to GetTagInfos in pull-mirror Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err) } err = db.WithTx(ctx, func(ctx context.Context) error { - dbReleases, err := db.Find[repo_model.Release](ctx, repo_model.FindReleasesOptions{ + dbReleases, err := db.Find[shortRelease](ctx, repo_model.FindReleasesOptions{ RepoID: repo.ID, }) if err != nil { @@ -562,12 +574,12 @@ func pullMirrorReleaseSync(ctx context.Context, repo *repo_model.Repository, git return nil } -func calcSync(destTags []*git.Tag, dbTags []*repo_model.Release) ([]*git.Tag, []int64, []*git.Tag) { +func calcSync(destTags []*git.Tag, dbTags []*shortRelease) ([]*git.Tag, []int64, []*git.Tag) { destTagMap := make(map[string]*git.Tag) for _, tag := range destTags { destTagMap[tag.Name] = tag } - dbTagMap := make(map[string]*repo_model.Release) + dbTagMap := make(map[string]*shortRelease) for _, rel := range dbTags { dbTagMap[rel.TagName] = rel } diff --git a/modules/repository/repo_test.go b/modules/repository/repo_test.go index 4c62b819d612d..74cbdfcd189bc 100644 --- a/modules/repository/repo_test.go +++ b/modules/repository/repo_test.go @@ -6,7 +6,6 @@ package repository import ( "testing" - repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/git" "github.com/stretchr/testify/assert" @@ -37,7 +36,7 @@ func Test_calcSync(t *testing.T) { }, } - dbReleases := []*repo_model.Release{ + dbReleases := []*shortRelease{ { ID: 1, TagName: "v0.1.0-beta", From 6d7d21d48350fd33e82428dbb60214f0ada509ac Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Wed, 17 Jan 2024 11:07:58 +0800 Subject: [PATCH 6/7] Fix bug --- modules/repository/repo.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/repository/repo.go b/modules/repository/repo.go index 065983af3251b..0b05a38e99d6d 100644 --- a/modules/repository/repo.go +++ b/modules/repository/repo.go @@ -518,7 +518,9 @@ func pullMirrorReleaseSync(ctx context.Context, repo *repo_model.Repository, git } err = db.WithTx(ctx, func(ctx context.Context) error { dbReleases, err := db.Find[shortRelease](ctx, repo_model.FindReleasesOptions{ - RepoID: repo.ID, + RepoID: repo.ID, + IncludeDrafts: true, + IncludeTags: true, }) if err != nil { return fmt.Errorf("unable to FindReleases in pull-mirror Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err) From 9ee6782e9adcb94018acead22e3f74293b08f1f6 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Fri, 26 Jan 2024 13:53:17 +0800 Subject: [PATCH 7/7] Follow 6543's suggestion --- modules/repository/repo_test.go | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/modules/repository/repo_test.go b/modules/repository/repo_test.go index 74cbdfcd189bc..68980f92f9450 100644 --- a/modules/repository/repo_test.go +++ b/modules/repository/repo_test.go @@ -62,12 +62,15 @@ func Test_calcSync(t *testing.T) { } inserts, deletes, updates := calcSync(gitTags, dbReleases) - assert.EqualValues(t, 1, len(inserts), "inserts") - assert.EqualValues(t, *gitTags[2], *inserts[0], "inserts equal") + if assert.EqualValues(t, 1, len(inserts), "inserts") { + assert.EqualValues(t, *gitTags[2], *inserts[0], "inserts equal") + } - assert.EqualValues(t, 1, len(deletes), "deletes") - assert.EqualValues(t, 1, deletes[0], "deletes equal") + if assert.EqualValues(t, 1, len(deletes), "deletes") { + assert.EqualValues(t, 1, deletes[0], "deletes equal") + } - assert.EqualValues(t, 1, len(updates), "updates") - assert.EqualValues(t, *gitTags[1], *updates[0], "updates equal") + if assert.EqualValues(t, 1, len(updates), "updates") { + assert.EqualValues(t, *gitTags[1], *updates[0], "updates equal") + } }