2017-10-26 23:10:54 -07:00
// Copyright 2017 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package models
import (
2018-02-05 00:39:51 -08:00
"fmt"
2017-10-26 23:10:54 -07:00
"strconv"
"strings"
"code.gitea.io/gitea/modules/base"
2019-03-27 17:33:00 +08:00
"code.gitea.io/gitea/modules/git"
2017-10-26 23:10:54 -07:00
"code.gitea.io/gitea/modules/indexer"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
2018-02-05 10:29:17 -08:00
"github.com/ethantkoenig/rupture"
2017-10-26 23:10:54 -07:00
)
// RepoIndexerStatus status of a repo's entry in the repo indexer
// For now, implicitly refers to default branch
type RepoIndexerStatus struct {
ID int64 ` xorm:"pk autoincr" `
RepoID int64 ` xorm:"INDEX" `
CommitSha string ` xorm:"VARCHAR(40)" `
}
func ( repo * Repository ) getIndexerStatus ( ) error {
if repo . IndexerStatus != nil {
return nil
}
status := & RepoIndexerStatus { RepoID : repo . ID }
has , err := x . Get ( status )
if err != nil {
return err
} else if ! has {
status . CommitSha = ""
}
repo . IndexerStatus = status
return nil
}
func ( repo * Repository ) updateIndexerStatus ( sha string ) error {
if err := repo . getIndexerStatus ( ) ; err != nil {
return err
}
if len ( repo . IndexerStatus . CommitSha ) == 0 {
repo . IndexerStatus . CommitSha = sha
_ , err := x . Insert ( repo . IndexerStatus )
return err
}
repo . IndexerStatus . CommitSha = sha
_ , err := x . ID ( repo . IndexerStatus . ID ) . Cols ( "commit_sha" ) .
Update ( repo . IndexerStatus )
return err
}
type repoIndexerOperation struct {
repo * Repository
deleted bool
}
var repoIndexerOperationQueue chan repoIndexerOperation
// InitRepoIndexer initialize the repo indexer
func InitRepoIndexer ( ) {
if ! setting . Indexer . RepoIndexerEnabled {
return
}
repoIndexerOperationQueue = make ( chan repoIndexerOperation , setting . Indexer . UpdateQueueLength )
2018-01-14 09:34:41 -08:00
indexer . InitRepoIndexer ( populateRepoIndexerAsynchronously )
2017-10-26 23:10:54 -07:00
go processRepoIndexerOperationQueue ( )
}
2018-01-14 09:34:41 -08:00
// populateRepoIndexerAsynchronously asynchronously populates the repo indexer
// with pre-existing data. This should only be run when the indexer is created
// for the first time.
func populateRepoIndexerAsynchronously ( ) error {
exist , err := x . Table ( "repository" ) . Exist ( )
if err != nil {
return err
} else if ! exist {
return nil
}
2018-01-17 13:51:07 -08:00
// if there is any existing repo indexer metadata in the DB, delete it
// since we are starting afresh. Also, xorm requires deletes to have a
// condition, and we want to delete everything, thus 1=1.
if _ , err := x . Where ( "1=1" ) . Delete ( new ( RepoIndexerStatus ) ) ; err != nil {
return err
}
2018-01-14 09:34:41 -08:00
var maxRepoID int64
if _ , err = x . Select ( "MAX(id)" ) . Table ( "repository" ) . Get ( & maxRepoID ) ; err != nil {
return err
}
go populateRepoIndexer ( maxRepoID )
return nil
}
// populateRepoIndexer populate the repo indexer with pre-existing data. This
// should only be run when the indexer is created for the first time.
func populateRepoIndexer ( maxRepoID int64 ) {
log . Info ( "Populating the repo indexer with existing repositories" )
// start with the maximum existing repo ID and work backwards, so that we
// don't include repos that are created after gitea starts; such repos will
// already be added to the indexer, and we don't need to add them again.
for maxRepoID > 0 {
repos := make ( [ ] * Repository , 0 , RepositoryListDefaultPageSize )
err := x . Where ( "id <= ?" , maxRepoID ) .
OrderBy ( "id DESC" ) .
Limit ( RepositoryListDefaultPageSize ) .
Find ( & repos )
2017-10-26 23:10:54 -07:00
if err != nil {
2019-04-02 08:48:31 +01:00
log . Error ( "populateRepoIndexer: %v" , err )
2018-01-14 09:34:41 -08:00
return
2017-10-26 23:10:54 -07:00
} else if len ( repos ) == 0 {
2018-01-14 09:34:41 -08:00
break
2017-10-26 23:10:54 -07:00
}
for _ , repo := range repos {
2018-01-14 09:34:41 -08:00
repoIndexerOperationQueue <- repoIndexerOperation {
repo : repo ,
deleted : false ,
2017-10-26 23:10:54 -07:00
}
2018-01-14 09:34:41 -08:00
maxRepoID = repo . ID - 1
2017-10-26 23:10:54 -07:00
}
}
2018-01-14 09:34:41 -08:00
log . Info ( "Done populating the repo indexer with existing repositories" )
2017-10-26 23:10:54 -07:00
}
func updateRepoIndexer ( repo * Repository ) error {
2018-02-05 00:39:51 -08:00
sha , err := getDefaultBranchSha ( repo )
if err != nil {
return err
}
changes , err := getRepoChanges ( repo , sha )
2017-10-26 23:10:54 -07:00
if err != nil {
return err
} else if changes == nil {
return nil
}
batch := indexer . RepoIndexerBatch ( )
2018-02-05 00:39:51 -08:00
for _ , update := range changes . Updates {
if err := addUpdate ( update , repo , batch ) ; err != nil {
2017-10-26 23:10:54 -07:00
return err
}
}
2018-02-05 00:39:51 -08:00
for _ , filename := range changes . RemovedFilenames {
2017-10-26 23:10:54 -07:00
if err := addDelete ( filename , repo , batch ) ; err != nil {
return err
}
}
if err = batch . Flush ( ) ; err != nil {
return err
}
2018-02-05 00:39:51 -08:00
return repo . updateIndexerStatus ( sha )
2017-10-26 23:10:54 -07:00
}
// repoChanges changes (file additions/updates/removals) to a repo
type repoChanges struct {
2018-02-05 00:39:51 -08:00
Updates [ ] fileUpdate
RemovedFilenames [ ] string
2017-10-26 23:10:54 -07:00
}
2018-02-05 00:39:51 -08:00
type fileUpdate struct {
Filename string
BlobSha string
}
2017-10-26 23:10:54 -07:00
2018-02-05 00:39:51 -08:00
func getDefaultBranchSha ( repo * Repository ) ( string , error ) {
stdout , err := git . NewCommand ( "show-ref" , "-s" , repo . DefaultBranch ) . RunInDir ( repo . RepoPath ( ) )
if err != nil {
return "" , err
}
return strings . TrimSpace ( stdout ) , nil
}
// getRepoChanges returns changes to repo since last indexer update
func getRepoChanges ( repo * Repository , revision string ) ( * repoChanges , error ) {
if err := repo . getIndexerStatus ( ) ; err != nil {
2017-10-26 23:10:54 -07:00
return nil , err
}
if len ( repo . IndexerStatus . CommitSha ) == 0 {
2018-02-05 00:39:51 -08:00
return genesisChanges ( repo , revision )
2017-10-26 23:10:54 -07:00
}
2018-02-05 00:39:51 -08:00
return nonGenesisChanges ( repo , revision )
2017-10-26 23:10:54 -07:00
}
2018-02-05 10:29:17 -08:00
func addUpdate ( update fileUpdate , repo * Repository , batch rupture . FlushingBatch ) error {
2018-02-05 00:39:51 -08:00
stdout , err := git . NewCommand ( "cat-file" , "-s" , update . BlobSha ) .
RunInDir ( repo . RepoPath ( ) )
if err != nil {
2017-10-26 23:10:54 -07:00
return err
2018-02-05 00:39:51 -08:00
}
if size , err := strconv . Atoi ( strings . TrimSpace ( stdout ) ) ; err != nil {
return fmt . Errorf ( "Misformatted git cat-file output: %v" , err )
} else if int64 ( size ) > setting . Indexer . MaxIndexerFileSize {
2017-12-06 23:18:04 -08:00
return nil
2017-10-26 23:10:54 -07:00
}
2018-02-05 00:39:51 -08:00
fileContents , err := git . NewCommand ( "cat-file" , "blob" , update . BlobSha ) .
RunInDirBytes ( repo . RepoPath ( ) )
2017-10-26 23:10:54 -07:00
if err != nil {
return err
} else if ! base . IsTextFile ( fileContents ) {
return nil
}
2018-02-05 10:29:17 -08:00
indexerUpdate := indexer . RepoIndexerUpdate {
2018-02-05 00:39:51 -08:00
Filepath : update . Filename ,
2017-10-26 23:10:54 -07:00
Op : indexer . RepoIndexerOpUpdate ,
Data : & indexer . RepoIndexerData {
RepoID : repo . ID ,
Content : string ( fileContents ) ,
} ,
2018-02-05 10:29:17 -08:00
}
return indexerUpdate . AddToFlushingBatch ( batch )
2017-10-26 23:10:54 -07:00
}
2018-02-05 10:29:17 -08:00
func addDelete ( filename string , repo * Repository , batch rupture . FlushingBatch ) error {
indexerUpdate := indexer . RepoIndexerUpdate {
2017-10-26 23:10:54 -07:00
Filepath : filename ,
Op : indexer . RepoIndexerOpDelete ,
Data : & indexer . RepoIndexerData {
RepoID : repo . ID ,
} ,
2018-02-05 10:29:17 -08:00
}
return indexerUpdate . AddToFlushingBatch ( batch )
2017-10-26 23:10:54 -07:00
}
2018-02-05 00:39:51 -08:00
// parseGitLsTreeOutput parses the output of a `git ls-tree -r --full-name` command
2018-02-10 10:19:26 -08:00
func parseGitLsTreeOutput ( stdout [ ] byte ) ( [ ] fileUpdate , error ) {
entries , err := git . ParseTreeEntries ( stdout )
if err != nil {
return nil , err
}
updates := make ( [ ] fileUpdate , len ( entries ) )
for i , entry := range entries {
updates [ i ] = fileUpdate {
Filename : entry . Name ( ) ,
BlobSha : entry . ID . String ( ) ,
2017-10-26 23:10:54 -07:00
}
2018-02-05 00:39:51 -08:00
}
return updates , nil
}
// genesisChanges get changes to add repo to the indexer for the first time
func genesisChanges ( repo * Repository , revision string ) ( * repoChanges , error ) {
var changes repoChanges
stdout , err := git . NewCommand ( "ls-tree" , "--full-tree" , "-r" , revision ) .
2018-02-10 10:19:26 -08:00
RunInDirBytes ( repo . RepoPath ( ) )
2018-02-05 00:39:51 -08:00
if err != nil {
return nil , err
2017-10-26 23:10:54 -07:00
}
2018-02-05 00:39:51 -08:00
changes . Updates , err = parseGitLsTreeOutput ( stdout )
return & changes , err
2017-10-26 23:10:54 -07:00
}
// nonGenesisChanges get changes since the previous indexer update
2018-02-05 00:39:51 -08:00
func nonGenesisChanges ( repo * Repository , revision string ) ( * repoChanges , error ) {
2017-10-26 23:10:54 -07:00
diffCmd := git . NewCommand ( "diff" , "--name-status" ,
2018-02-05 00:39:51 -08:00
repo . IndexerStatus . CommitSha , revision )
stdout , err := diffCmd . RunInDir ( repo . RepoPath ( ) )
2017-10-26 23:10:54 -07:00
if err != nil {
// previous commit sha may have been removed by a force push, so
// try rebuilding from scratch
2018-02-05 00:39:51 -08:00
log . Warn ( "git diff: %v" , err )
2017-10-26 23:10:54 -07:00
if err = indexer . DeleteRepoFromIndexer ( repo . ID ) ; err != nil {
return nil , err
}
2018-02-05 00:39:51 -08:00
return genesisChanges ( repo , revision )
2017-10-26 23:10:54 -07:00
}
var changes repoChanges
2018-02-05 00:39:51 -08:00
updatedFilenames := make ( [ ] string , 0 , 10 )
2017-10-26 23:10:54 -07:00
for _ , line := range strings . Split ( stdout , "\n" ) {
line = strings . TrimSpace ( line )
if len ( line ) == 0 {
continue
}
filename := strings . TrimSpace ( line [ 1 : ] )
if len ( filename ) == 0 {
continue
} else if filename [ 0 ] == '"' {
filename , err = strconv . Unquote ( filename )
if err != nil {
return nil , err
}
}
switch status := line [ 0 ] ; status {
case 'M' , 'A' :
2018-02-05 00:39:51 -08:00
updatedFilenames = append ( updatedFilenames , filename )
2017-10-26 23:10:54 -07:00
case 'D' :
2018-02-05 00:39:51 -08:00
changes . RemovedFilenames = append ( changes . RemovedFilenames , filename )
2017-10-26 23:10:54 -07:00
default :
log . Warn ( "Unrecognized status: %c (line=%s)" , status , line )
}
}
2018-02-05 00:39:51 -08:00
cmd := git . NewCommand ( "ls-tree" , "--full-tree" , revision , "--" )
cmd . AddArguments ( updatedFilenames ... )
2018-02-10 10:19:26 -08:00
lsTreeStdout , err := cmd . RunInDirBytes ( repo . RepoPath ( ) )
2017-10-26 23:10:54 -07:00
if err != nil {
2018-02-05 00:39:51 -08:00
return nil , err
2017-10-26 23:10:54 -07:00
}
2018-02-10 10:19:26 -08:00
changes . Updates , err = parseGitLsTreeOutput ( lsTreeStdout )
2018-02-05 00:39:51 -08:00
return & changes , err
2017-10-26 23:10:54 -07:00
}
func processRepoIndexerOperationQueue ( ) {
for {
op := <- repoIndexerOperationQueue
if op . deleted {
if err := indexer . DeleteRepoFromIndexer ( op . repo . ID ) ; err != nil {
2019-04-02 08:48:31 +01:00
log . Error ( "DeleteRepoFromIndexer: %v" , err )
2017-10-26 23:10:54 -07:00
}
} else {
if err := updateRepoIndexer ( op . repo ) ; err != nil {
2019-04-02 08:48:31 +01:00
log . Error ( "updateRepoIndexer: %v" , err )
2017-10-26 23:10:54 -07:00
}
}
}
}
// DeleteRepoFromIndexer remove all of a repository's entries from the indexer
func DeleteRepoFromIndexer ( repo * Repository ) {
addOperationToQueue ( repoIndexerOperation { repo : repo , deleted : true } )
}
// UpdateRepoIndexer update a repository's entries in the indexer
func UpdateRepoIndexer ( repo * Repository ) {
addOperationToQueue ( repoIndexerOperation { repo : repo , deleted : false } )
}
func addOperationToQueue ( op repoIndexerOperation ) {
if ! setting . Indexer . RepoIndexerEnabled {
return
}
select {
case repoIndexerOperationQueue <- op :
break
default :
go func ( ) {
repoIndexerOperationQueue <- op
} ( )
}
}