2019-12-23 15:31:16 +03:00
// Copyright 2019 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package code
import (
"strconv"
"strings"
2021-12-10 04:27:50 +03:00
repo_model "code.gitea.io/gitea/models/repo"
2019-12-23 15:31:16 +03:00
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
)
type fileUpdate struct {
Filename string
BlobSha string
2021-02-18 00:32:25 +03:00
Size int64
Sized bool
2019-12-23 15:31:16 +03:00
}
// repoChanges changes (file additions/updates/removals) to a repo
type repoChanges struct {
Updates [ ] fileUpdate
RemovedFilenames [ ] string
}
2021-12-10 04:27:50 +03:00
func getDefaultBranchSha ( repo * repo_model . Repository ) ( string , error ) {
2019-12-23 15:31:16 +03:00
stdout , err := git . NewCommand ( "show-ref" , "-s" , git . BranchPrefix + repo . DefaultBranch ) . RunInDir ( repo . RepoPath ( ) )
if err != nil {
return "" , err
}
return strings . TrimSpace ( stdout ) , nil
}
// getRepoChanges returns changes to repo since last indexer update
2021-12-10 04:27:50 +03:00
func getRepoChanges ( repo * repo_model . Repository , revision string ) ( * repoChanges , error ) {
status , err := repo_model . GetIndexerStatus ( repo , repo_model . RepoIndexerTypeCode )
2020-02-11 12:34:17 +03:00
if err != nil {
2019-12-23 15:31:16 +03:00
return nil , err
}
2020-02-11 12:34:17 +03:00
if len ( status . CommitSha ) == 0 {
2019-12-23 15:31:16 +03:00
return genesisChanges ( repo , revision )
}
return nonGenesisChanges ( repo , revision )
}
func isIndexable ( entry * git . TreeEntry ) bool {
if ! entry . IsRegular ( ) && ! entry . IsExecutable ( ) {
return false
}
name := strings . ToLower ( entry . Name ( ) )
for _ , g := range setting . Indexer . ExcludePatterns {
if g . Match ( name ) {
return false
}
}
for _ , g := range setting . Indexer . IncludePatterns {
if g . Match ( name ) {
return true
}
}
return len ( setting . Indexer . IncludePatterns ) == 0
}
// parseGitLsTreeOutput parses the output of a `git ls-tree -r --full-name` command
func parseGitLsTreeOutput ( stdout [ ] byte ) ( [ ] fileUpdate , error ) {
entries , err := git . ParseTreeEntries ( stdout )
if err != nil {
return nil , err
}
var idxCount = 0
updates := make ( [ ] fileUpdate , len ( entries ) )
for _ , entry := range entries {
if isIndexable ( entry ) {
updates [ idxCount ] = fileUpdate {
Filename : entry . Name ( ) ,
BlobSha : entry . ID . String ( ) ,
2021-02-18 00:32:25 +03:00
Size : entry . Size ( ) ,
Sized : true ,
2019-12-23 15:31:16 +03:00
}
idxCount ++
}
}
return updates [ : idxCount ] , nil
}
// genesisChanges get changes to add repo to the indexer for the first time
2021-12-10 04:27:50 +03:00
func genesisChanges ( repo * repo_model . Repository , revision string ) ( * repoChanges , error ) {
2019-12-23 15:31:16 +03:00
var changes repoChanges
2021-02-18 00:32:25 +03:00
stdout , err := git . NewCommand ( "ls-tree" , "--full-tree" , "-l" , "-r" , revision ) .
2019-12-23 15:31:16 +03:00
RunInDirBytes ( repo . RepoPath ( ) )
if err != nil {
return nil , err
}
changes . Updates , err = parseGitLsTreeOutput ( stdout )
return & changes , err
}
// nonGenesisChanges get changes since the previous indexer update
2021-12-10 04:27:50 +03:00
func nonGenesisChanges ( repo * repo_model . Repository , revision string ) ( * repoChanges , error ) {
2019-12-23 15:31:16 +03:00
diffCmd := git . NewCommand ( "diff" , "--name-status" ,
2020-02-11 12:34:17 +03:00
repo . CodeIndexerStatus . CommitSha , revision )
2019-12-23 15:31:16 +03:00
stdout , err := diffCmd . RunInDir ( repo . RepoPath ( ) )
if err != nil {
// previous commit sha may have been removed by a force push, so
// try rebuilding from scratch
log . Warn ( "git diff: %v" , err )
if err = indexer . Delete ( repo . ID ) ; err != nil {
return nil , err
}
return genesisChanges ( repo , revision )
}
var changes repoChanges
updatedFilenames := make ( [ ] string , 0 , 10 )
for _ , line := range strings . Split ( stdout , "\n" ) {
line = strings . TrimSpace ( line )
if len ( line ) == 0 {
continue
}
2020-01-25 02:26:49 +03:00
fields := strings . Split ( line , "\t" )
if len ( fields ) < 2 {
log . Warn ( "Unparseable output for diff --name-status: `%s`)" , line )
continue
}
filename := fields [ 1 ]
2019-12-23 15:31:16 +03:00
if len ( filename ) == 0 {
continue
} else if filename [ 0 ] == '"' {
filename , err = strconv . Unquote ( filename )
if err != nil {
return nil , err
}
}
2020-01-25 02:26:49 +03:00
switch status := fields [ 0 ] [ 0 ] ; status {
2019-12-23 15:31:16 +03:00
case 'M' , 'A' :
updatedFilenames = append ( updatedFilenames , filename )
case 'D' :
changes . RemovedFilenames = append ( changes . RemovedFilenames , filename )
2020-01-25 02:26:49 +03:00
case 'R' , 'C' :
if len ( fields ) < 3 {
log . Warn ( "Unparseable output for diff --name-status: `%s`)" , line )
continue
}
dest := fields [ 2 ]
if len ( dest ) == 0 {
log . Warn ( "Unparseable output for diff --name-status: `%s`)" , line )
continue
}
if dest [ 0 ] == '"' {
dest , err = strconv . Unquote ( dest )
if err != nil {
return nil , err
}
}
if status == 'R' {
changes . RemovedFilenames = append ( changes . RemovedFilenames , filename )
}
updatedFilenames = append ( updatedFilenames , dest )
2019-12-23 15:31:16 +03:00
default :
log . Warn ( "Unrecognized status: %c (line=%s)" , status , line )
}
}
2021-02-18 00:32:25 +03:00
cmd := git . NewCommand ( "ls-tree" , "--full-tree" , "-l" , revision , "--" )
2019-12-23 15:31:16 +03:00
cmd . AddArguments ( updatedFilenames ... )
lsTreeStdout , err := cmd . RunInDirBytes ( repo . RepoPath ( ) )
if err != nil {
return nil , err
}
changes . Updates , err = parseGitLsTreeOutput ( lsTreeStdout )
return & changes , err
}