2022-12-15 23:44:16 +03:00
// Copyright 2022 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package repository
import (
"context"
2023-01-16 22:50:53 +03:00
"errors"
2022-12-15 23:44:16 +03:00
"fmt"
"time"
git_model "code.gitea.io/gitea/models/git"
repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/lfs"
"code.gitea.io/gitea/modules/log"
2023-01-16 22:50:53 +03:00
"code.gitea.io/gitea/modules/setting"
2023-07-26 10:02:53 +03:00
"code.gitea.io/gitea/modules/timeutil"
2022-12-15 23:44:16 +03:00
)
2023-01-16 22:50:53 +03:00
// GarbageCollectLFSMetaObjectsOptions provides options for GarbageCollectLFSMetaObjects function
type GarbageCollectLFSMetaObjectsOptions struct {
2023-07-06 19:52:41 +03:00
LogDetail func ( format string , v ... any )
2023-01-16 22:50:53 +03:00
AutoFix bool
OlderThan time . Time
UpdatedLessRecentlyThan time . Time
NumberToCheckPerRepo int64
ProportionToCheckPerRepo float64
}
// GarbageCollectLFSMetaObjects garbage collects LFS objects for all repositories
func GarbageCollectLFSMetaObjects ( ctx context . Context , opts GarbageCollectLFSMetaObjectsOptions ) error {
2022-12-15 23:44:16 +03:00
log . Trace ( "Doing: GarbageCollectLFSMetaObjects" )
2023-01-16 22:50:53 +03:00
defer log . Trace ( "Finished: GarbageCollectLFSMetaObjects" )
2022-12-15 23:44:16 +03:00
2023-07-06 19:52:41 +03:00
if opts . LogDetail == nil {
opts . LogDetail = log . Debug
}
2023-01-16 22:50:53 +03:00
if ! setting . LFS . StartServer {
2023-07-06 19:52:41 +03:00
opts . LogDetail ( "LFS support is disabled" )
2023-01-16 22:50:53 +03:00
return nil
2022-12-15 23:44:16 +03:00
}
2023-01-16 22:50:53 +03:00
return git_model . IterateRepositoryIDsWithLFSMetaObjects ( ctx , func ( ctx context . Context , repoID , count int64 ) error {
repo , err := repo_model . GetRepositoryByID ( ctx , repoID )
if err != nil {
return err
}
if newMinimum := int64 ( float64 ( count ) * opts . ProportionToCheckPerRepo ) ; newMinimum > opts . NumberToCheckPerRepo && opts . NumberToCheckPerRepo != 0 {
opts . NumberToCheckPerRepo = newMinimum
}
return GarbageCollectLFSMetaObjectsForRepo ( ctx , repo , opts )
} )
2022-12-15 23:44:16 +03:00
}
2023-01-16 22:50:53 +03:00
// GarbageCollectLFSMetaObjectsForRepo garbage collects LFS objects for a specific repository
func GarbageCollectLFSMetaObjectsForRepo ( ctx context . Context , repo * repo_model . Repository , opts GarbageCollectLFSMetaObjectsOptions ) error {
2023-07-06 19:52:41 +03:00
opts . LogDetail ( "Checking %-v" , repo )
2023-01-16 22:50:53 +03:00
total , orphaned , collected , deleted := int64 ( 0 ) , 0 , 0 , 0
2023-07-06 19:52:41 +03:00
defer func ( ) {
if orphaned == 0 {
opts . LogDetail ( "Found %d total LFSMetaObjects in %-v" , total , repo )
} else if ! opts . AutoFix {
opts . LogDetail ( "Found %d/%d orphaned LFSMetaObjects in %-v" , orphaned , total , repo )
} else {
opts . LogDetail ( "Collected %d/%d orphaned/%d total LFSMetaObjects in %-v. %d removed from storage." , collected , orphaned , total , repo , deleted )
}
} ( )
2022-12-15 23:44:16 +03:00
gitRepo , err := git . OpenRepository ( ctx , repo . RepoPath ( ) )
if err != nil {
log . Error ( "Unable to open git repository %-v: %v" , repo , err )
return err
}
defer gitRepo . Close ( )
store := lfs . NewContentStore ( )
2023-01-16 22:50:53 +03:00
errStop := errors . New ( "STOPERR" )
2023-12-14 00:02:00 +03:00
objectFormat , _ := gitRepo . GetObjectFormat ( )
2022-12-15 23:44:16 +03:00
2023-01-16 22:50:53 +03:00
err = git_model . IterateLFSMetaObjectsForRepo ( ctx , repo . ID , func ( ctx context . Context , metaObject * git_model . LFSMetaObject , count int64 ) error {
if opts . NumberToCheckPerRepo > 0 && total > opts . NumberToCheckPerRepo {
return errStop
}
2022-12-15 23:44:16 +03:00
total ++
2023-12-14 00:02:00 +03:00
pointerSha := git . ComputeBlobHash ( objectFormat , [ ] byte ( metaObject . Pointer . StringContent ( ) ) )
2022-12-15 23:44:16 +03:00
if gitRepo . IsObjectExist ( pointerSha . String ( ) ) {
2023-01-16 22:50:53 +03:00
return git_model . MarkLFSMetaObject ( ctx , metaObject . ID )
2022-12-15 23:44:16 +03:00
}
orphaned ++
2023-01-16 22:50:53 +03:00
if ! opts . AutoFix {
2022-12-15 23:44:16 +03:00
return nil
}
// Non-existent pointer file
2023-01-09 06:50:54 +03:00
_ , err = git_model . RemoveLFSMetaObjectByOidFn ( ctx , repo . ID , metaObject . Oid , func ( count int64 ) error {
2022-12-15 23:44:16 +03:00
if count > 0 {
return nil
}
if err := store . Delete ( metaObject . RelativePath ( ) ) ; err != nil {
log . Error ( "Unable to remove lfs metaobject %s from store: %v" , metaObject . Oid , err )
}
deleted ++
return nil
} )
if err != nil {
return fmt . Errorf ( "unable to remove meta-object %s in %s: %w" , metaObject . Oid , repo . FullName ( ) , err )
}
collected ++
return nil
} , & git_model . IterateLFSMetaObjectsForRepoOptions {
// Only attempt to garbage collect lfs meta objects older than a week as the order of git lfs upload
// and git object upload is not necessarily guaranteed. It's possible to imagine a situation whereby
// an LFS object is uploaded but the git branch is not uploaded immediately, or there are some rapid
// changes in new branches that might lead to lfs objects becoming temporarily unassociated with git
// objects.
//
// It is likely that a week is potentially excessive but it should definitely be enough that any
// unassociated LFS object is genuinely unassociated.
2023-07-26 10:02:53 +03:00
OlderThan : timeutil . TimeStamp ( opts . OlderThan . Unix ( ) ) ,
UpdatedLessRecentlyThan : timeutil . TimeStamp ( opts . UpdatedLessRecentlyThan . Unix ( ) ) ,
2023-01-16 22:50:53 +03:00
OrderByUpdated : true ,
LoopFunctionAlwaysUpdates : true ,
2022-12-15 23:44:16 +03:00
} )
2023-01-16 22:50:53 +03:00
if err == errStop {
2023-07-06 19:52:41 +03:00
opts . LogDetail ( "Processing stopped at %d total LFSMetaObjects in %-v" , total , repo )
2023-01-16 22:50:53 +03:00
return nil
} else if err != nil {
return err
}
return nil
2022-12-15 23:44:16 +03:00
}