Skip to content

Commit 35c7906

Browse files
committed
Improve generational GC and only check repos with LFSMetaObjects
Signed-off-by: Andrew Thornton <[email protected]>
1 parent 645ba23 commit 35c7906

File tree

6 files changed

+99
-46
lines changed

6 files changed

+99
-46
lines changed

custom/conf/app.example.ini

+5
Original file line numberDiff line numberDiff line change
@@ -2188,6 +2188,11 @@ ROUTER = console
21882188
;OLDER_THAN = 168h
21892189
;; Only attempt to garbage collect LFSMetaObjects that have not been attempted to be garbage collected for this long (default 3 days)
21902190
;LAST_UPDATED_MORE_THAN_AGO = 72h
2191+
; Minimum number of stale LFSMetaObjects to check per repo. Set to `0` to always check all.
2192+
;NUMBER_TO_CHECK_PER_REPO = 100
2193+
;Check at least this proportion of LFSMetaObjects per repo. (This may cause all stale LFSMetaObjects to be checked.)
2194+
;PROPORTION_TO_CHECK_PER_REPO = 0.6
2195+
21912196

21922197
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
21932198
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

docs/content/doc/advanced/config-cheat-sheet.en-us.md

+2
Original file line numberDiff line numberDiff line change
@@ -1032,6 +1032,8 @@ Default templates for project boards:
10321032
- `SCHEDULE`: **@every 24h**: Cron syntax to set how often to check.
10331033
- `OLDER_THAN`: **168h**: Only attempt to garbage collect LFSMetaObjects older than this (default 7 days)
10341034
- `LAST_UPDATED_MORE_THAN_AGO`: **72h**: Only attempt to garbage collect LFSMetaObjects that have not been attempted to be garbage collected for this long (default 3 days)
1035+
- `NUMBER_TO_CHECK_PER_REPO`: **100**: Minimum number of stale LFSMetaObjects to check per repo. Set to `0` to always check all.
1036+
- `PROPORTION_TO_CHECK_PER_REPO`: **0.6**: Check at least this proportion of LFSMetaObjects per repo. (This may cause all stale LFSMetaObjects to be checked.)
10351037

10361038
## Git (`git`)
10371039

models/git/lfs.go

+47-6
Original file line numberDiff line numberDiff line change
@@ -335,9 +335,45 @@ func GetRepoLFSSize(ctx context.Context, repoID int64) (int64, error) {
335335
return lfsSize, nil
336336
}
337337

338+
// IterateRepositoryIDsWithLFSMetaObjects iterates across the repositories that have LFSMetaObjects
339+
func IterateRepositoryIDsWithLFSMetaObjects(ctx context.Context, f func(ctx context.Context, repoID, count int64) error) error {
340+
batchSize := setting.Database.IterateBufferSize
341+
sess := db.GetEngine(ctx)
342+
id := int64(0)
343+
type RepositoryCount struct {
344+
RepositoryID int64
345+
Count int64
346+
}
347+
for {
348+
counts := make([]*RepositoryCount, 0, batchSize)
349+
sess.Select("repository_id, COUNT(id) AS count").
350+
Table("lfs_meta_object").
351+
Where("repository_id > ?", id).
352+
GroupBy("repository_id").
353+
OrderBy("repository_id ASC")
354+
355+
if err := sess.Limit(batchSize, 0).Find(&counts); err != nil {
356+
return err
357+
}
358+
if len(counts) == 0 {
359+
return nil
360+
}
361+
362+
for _, count := range counts {
363+
if err := f(ctx, count.RepositoryID, count.Count); err != nil {
364+
return err
365+
}
366+
}
367+
id = counts[len(counts)-1].RepositoryID
368+
}
369+
}
370+
371+
// IterateLFSMetaObjectsForRepoOptions provides options for IterateLFSMetaObjectsForRepo
338372
type IterateLFSMetaObjectsForRepoOptions struct {
339-
OlderThan time.Time
340-
UpdatedLessRecentlyThan time.Time
373+
OlderThan time.Time
374+
UpdatedLessRecentlyThan time.Time
375+
OrderByUpdated bool
376+
LoopFunctionAlwaysUpdates bool
341377
}
342378

343379
// IterateLFSMetaObjectsForRepo provides a iterator for LFSMetaObjects per Repo
@@ -354,7 +390,6 @@ func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(cont
354390

355391
for {
356392
beans := make([]*CountLFSMetaObject, 0, batchSize)
357-
// SELECT `lfs_meta_object`.*, COUNT(`l1`.id) as `count` FROM lfs_meta_object INNER JOIN lfs_meta_object AS l1 ON l1.oid = lfs_meta_object.oid WHERE lfs_meta_object.repository_id = ? GROUP BY lfs_meta_object.id
358393
sess := engine.Select("`lfs_meta_object`.*, COUNT(`l1`.oid) AS `count`").
359394
Join("INNER", "`lfs_meta_object` AS l1", "`lfs_meta_object`.oid = `l1`.oid").
360395
Where("`lfs_meta_object`.repository_id = ?", repoID)
@@ -364,16 +399,22 @@ func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(cont
364399
if !opts.UpdatedLessRecentlyThan.IsZero() {
365400
sess.And("`lfs_meta_object`.updated_unix < ?", opts.UpdatedLessRecentlyThan)
366401
}
367-
sess.And("`lfs_meta_object`.id > ?", id)
368402
sess.GroupBy("`lfs_meta_object`.id")
369-
sess.OrderBy("`lfs_meta_object`.id ASC")
403+
if opts.OrderByUpdated {
404+
sess.OrderBy("`lfs_meta_object`.updated_unix ASC")
405+
} else {
406+
sess.And("`lfs_meta_object`.id > ?", id)
407+
sess.OrderBy("`lfs_meta_object`.id ASC")
408+
}
370409
if err := sess.Limit(batchSize, start).Find(&beans); err != nil {
371410
return err
372411
}
373412
if len(beans) == 0 {
374413
return nil
375414
}
376-
start += len(beans)
415+
if !opts.LoopFunctionAlwaysUpdates {
416+
start += len(beans)
417+
}
377418

378419
for _, bean := range beans {
379420
if err := f(ctx, &bean.LFSMetaObject, bean.Count); err != nil {

modules/doctor/lfs.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ func garbageCollectLFSCheck(ctx context.Context, logger log.Logger, autofix bool
4141
//
4242
// It is likely that a week is potentially excessive but it should definitely be enough that any
4343
// unassociated LFS object is genuinely unassociated.
44-
OlderThan: 24 * time.Hour * 7,
44+
OlderThan: time.Now().Add(-24 * time.Hour * 7),
4545
// We don't set the UpdatedLessRecentlyThan because we want to do a full GC
4646
}); err != nil {
4747
return err

services/cron/tasks_extended.go

+10-6
Original file line numberDiff line numberDiff line change
@@ -181,10 +181,12 @@ func registerGCLFS() {
181181
}
182182
type GCLFSConfig struct {
183183
OlderThanConfig
184-
LastUpdatedMoreThanAgo time.Duration
184+
LastUpdatedMoreThanAgo time.Duration
185+
NumberToCheckPerRepo int64
186+
ProportionToCheckPerRepo float64
185187
}
186188

187-
RegisterTaskFatal("delete_old_system_notices", &GCLFSConfig{
189+
RegisterTaskFatal("gc_lfs", &GCLFSConfig{
188190
OlderThanConfig: OlderThanConfig{
189191
BaseConfig: BaseConfig{
190192
Enabled: false,
@@ -202,13 +204,15 @@ func registerGCLFS() {
202204
OlderThan: 24 * time.Hour * 7,
203205
},
204206
// Only GC things that haven't been looked at in the past 3 days
205-
LastUpdatedMoreThanAgo: 24 * time.Hour * 3,
207+
LastUpdatedMoreThanAgo: 24 * time.Hour * 3,
208+
NumberToCheckPerRepo: 100,
209+
ProportionToCheckPerRepo: 0.6,
206210
}, func(ctx context.Context, _ *user_model.User, config Config) error {
207211
gcLFSConfig := config.(*GCLFSConfig)
208212
return repo_service.GarbageCollectLFSMetaObjects(ctx, repo_service.GarbageCollectLFSMetaObjectsOptions{
209-
AutoFix: true,
210-
OlderThan: gcLFSConfig.OlderThan,
211-
LastUpdatedMoreThanAgo: gcLFSConfig.LastUpdatedMoreThanAgo,
213+
AutoFix: true,
214+
OlderThan: time.Now().Add(-gcLFSConfig.OlderThan),
215+
UpdatedLessRecentlyThan: time.Now().Add(-gcLFSConfig.LastUpdatedMoreThanAgo),
212216
})
213217
})
214218
}

services/repository/lfs.go

+34-33
Original file line numberDiff line numberDiff line change
@@ -5,27 +5,26 @@ package repository
55

66
import (
77
"context"
8+
"errors"
89
"fmt"
910
"time"
1011

11-
"code.gitea.io/gitea/models/db"
1212
git_model "code.gitea.io/gitea/models/git"
1313
repo_model "code.gitea.io/gitea/models/repo"
1414
"code.gitea.io/gitea/modules/git"
1515
"code.gitea.io/gitea/modules/lfs"
1616
"code.gitea.io/gitea/modules/log"
1717
"code.gitea.io/gitea/modules/setting"
18-
19-
"xorm.io/builder"
2018
)
2119

2220
// GarbageCollectLFSMetaObjectsOptions provides options for GarbageCollectLFSMetaObjects function
2321
type GarbageCollectLFSMetaObjectsOptions struct {
24-
RepoID int64
25-
Logger log.Logger
26-
AutoFix bool
27-
OlderThan time.Duration
28-
LastUpdatedMoreThanAgo time.Duration
22+
Logger log.Logger
23+
AutoFix bool
24+
OlderThan time.Time
25+
UpdatedLessRecentlyThan time.Time
26+
NumberToCheckPerRepo int64
27+
ProportionToCheckPerRepo float64
2928
}
3029

3130
// GarbageCollectLFSMetaObjects garbage collects LFS objects for all repositories
@@ -40,29 +39,25 @@ func GarbageCollectLFSMetaObjects(ctx context.Context, opts GarbageCollectLFSMet
4039
return nil
4140
}
4241

43-
if opts.RepoID == 0 {
44-
repo, err := repo_model.GetRepositoryByID(ctx, opts.RepoID)
42+
return git_model.IterateRepositoryIDsWithLFSMetaObjects(ctx, func(ctx context.Context, repoID, count int64) error {
43+
repo, err := repo_model.GetRepositoryByID(ctx, repoID)
4544
if err != nil {
4645
return err
4746
}
48-
return GarbageCollectLFSMetaObjectsForRepo(ctx, repo, opts)
49-
}
5047

51-
return db.Iterate(
52-
ctx,
53-
builder.And(builder.Gt{"id": 0}),
54-
func(ctx context.Context, repo *repo_model.Repository) error {
55-
return GarbageCollectLFSMetaObjectsForRepo(ctx, repo, opts)
56-
},
57-
)
48+
if newMinimum := int64(float64(count) * opts.ProportionToCheckPerRepo); newMinimum > opts.NumberToCheckPerRepo && opts.NumberToCheckPerRepo != 0 {
49+
opts.NumberToCheckPerRepo = newMinimum
50+
}
51+
return GarbageCollectLFSMetaObjectsForRepo(ctx, repo, opts)
52+
})
5853
}
5954

6055
// GarbageCollectLFSMetaObjectsForRepo garbage collects LFS objects for a specific repository
6156
func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.Repository, opts GarbageCollectLFSMetaObjectsOptions) error {
6257
if opts.Logger != nil {
6358
opts.Logger.Info("Checking %-v", repo)
6459
}
65-
total, orphaned, collected, deleted := 0, 0, 0, 0
60+
total, orphaned, collected, deleted := int64(0), 0, 0, 0
6661
if opts.Logger != nil {
6762
defer func() {
6863
if orphaned == 0 {
@@ -83,18 +78,12 @@ func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.R
8378
defer gitRepo.Close()
8479

8580
store := lfs.NewContentStore()
81+
errStop := errors.New("STOPERR")
8682

87-
var olderThan time.Time
88-
var updatedLessRecentlyThan time.Time
89-
90-
if opts.OlderThan > 0 {
91-
olderThan = time.Now().Add(opts.OlderThan)
92-
}
93-
if opts.LastUpdatedMoreThanAgo > 0 {
94-
updatedLessRecentlyThan = time.Now().Add(opts.LastUpdatedMoreThanAgo)
95-
}
96-
97-
return git_model.IterateLFSMetaObjectsForRepo(ctx, repo.ID, func(ctx context.Context, metaObject *git_model.LFSMetaObject, count int64) error {
83+
err = git_model.IterateLFSMetaObjectsForRepo(ctx, repo.ID, func(ctx context.Context, metaObject *git_model.LFSMetaObject, count int64) error {
84+
if opts.NumberToCheckPerRepo > 0 && total > opts.NumberToCheckPerRepo {
85+
return errStop
86+
}
9887
total++
9988
pointerSha := git.ComputeBlobHash([]byte(metaObject.Pointer.StringContent()))
10089

@@ -133,7 +122,19 @@ func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.R
133122
//
134123
// It is likely that a week is potentially excessive but it should definitely be enough that any
135124
// unassociated LFS object is genuinely unassociated.
136-
OlderThan: olderThan,
137-
UpdatedLessRecentlyThan: updatedLessRecentlyThan,
125+
OlderThan: opts.OlderThan,
126+
UpdatedLessRecentlyThan: opts.UpdatedLessRecentlyThan,
127+
OrderByUpdated: true,
128+
LoopFunctionAlwaysUpdates: true,
138129
})
130+
131+
if err == errStop {
132+
if opts.Logger != nil {
133+
opts.Logger.Info("Processing stopped at %d total LFSMetaObjects in %-v", total, repo)
134+
}
135+
return nil
136+
} else if err != nil {
137+
return err
138+
}
139+
return nil
139140
}

0 commit comments

Comments
 (0)