2020-12-17 17:00:47 +03:00
// Copyright 2017 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
2021-08-24 19:47:09 +03:00
//go:build gogit
2020-12-17 17:00:47 +03:00
package git
import (
2021-06-07 02:44:58 +03:00
"context"
2020-12-17 17:00:47 +03:00
"path"
"github.com/emirpasic/gods/trees/binaryheap"
"github.com/go-git/go-git/v5/plumbing"
"github.com/go-git/go-git/v5/plumbing/object"
cgobject "github.com/go-git/go-git/v5/plumbing/object/commitgraph"
)
// GetCommitsInfo gets information of all commits that are corresponding to these entries
2022-07-25 18:39:42 +03:00
func ( tes Entries ) GetCommitsInfo ( ctx context . Context , commit * Commit , treePath string ) ( [ ] CommitInfo , * Commit , error ) {
2020-12-17 17:00:47 +03:00
entryPaths := make ( [ ] string , len ( tes ) + 1 )
// Get the commit for the treePath itself
entryPaths [ 0 ] = ""
for i , entry := range tes {
entryPaths [ i + 1 ] = entry . Name ( )
}
commitNodeIndex , commitGraphFile := commit . repo . CommitNodeIndex ( )
if commitGraphFile != nil {
defer commitGraphFile . Close ( )
}
c , err := commitNodeIndex . Get ( commit . ID )
if err != nil {
return nil , nil , err
}
2022-07-25 18:39:42 +03:00
var revs map [ string ] * Commit
if commit . repo . LastCommitCache != nil {
2020-12-17 17:00:47 +03:00
var unHitPaths [ ] string
2022-07-25 18:39:42 +03:00
revs , unHitPaths , err = getLastCommitForPathsByCache ( commit . ID . String ( ) , treePath , entryPaths , commit . repo . LastCommitCache )
2020-12-17 17:00:47 +03:00
if err != nil {
return nil , nil , err
}
if len ( unHitPaths ) > 0 {
2022-07-25 18:39:42 +03:00
revs2 , err := GetLastCommitForPaths ( ctx , commit . repo . LastCommitCache , c , treePath , unHitPaths )
2020-12-17 17:00:47 +03:00
if err != nil {
return nil , nil , err
}
for k , v := range revs2 {
revs [ k ] = v
}
}
} else {
2021-10-08 16:08:22 +03:00
revs , err = GetLastCommitForPaths ( ctx , nil , c , treePath , entryPaths )
2020-12-17 17:00:47 +03:00
}
if err != nil {
return nil , nil , err
}
commit . repo . gogitStorage . Close ( )
commitsInfo := make ( [ ] CommitInfo , len ( tes ) )
for i , entry := range tes {
commitsInfo [ i ] = CommitInfo {
Entry : entry ,
}
2021-10-08 16:08:22 +03:00
// Check if we have found a commit for this entry in time
2022-07-25 18:39:42 +03:00
if entryCommit , ok := revs [ entry . Name ( ) ] ; ok {
2020-12-17 17:00:47 +03:00
commitsInfo [ i ] . Commit = entryCommit
2021-10-08 16:08:22 +03:00
}
// If the entry if a submodule add a submodule file for this
if entry . IsSubModule ( ) {
subModuleURL := ""
var fullPath string
if len ( treePath ) > 0 {
fullPath = treePath + "/" + entry . Name ( )
} else {
fullPath = entry . Name ( )
}
if subModule , err := commit . GetSubModule ( fullPath ) ; err != nil {
return nil , nil , err
} else if subModule != nil {
subModuleURL = subModule . URL
2020-12-17 17:00:47 +03:00
}
2021-10-08 16:08:22 +03:00
subModuleFile := NewSubModuleFile ( commitsInfo [ i ] . Commit , subModuleURL , entry . ID . String ( ) )
commitsInfo [ i ] . SubModuleFile = subModuleFile
2020-12-17 17:00:47 +03:00
}
}
// Retrieve the commit for the treePath itself (see above). We basically
// get it for free during the tree traversal and it's used for listing
// pages to display information about newest commit for a given path.
var treeCommit * Commit
2022-07-25 18:39:42 +03:00
var ok bool
2020-12-17 17:00:47 +03:00
if treePath == "" {
treeCommit = commit
2022-07-25 18:39:42 +03:00
} else if treeCommit , ok = revs [ "" ] ; ok {
2020-12-17 17:00:47 +03:00
treeCommit . repo = commit . repo
}
return commitsInfo , treeCommit , nil
}
type commitAndPaths struct {
commit cgobject . CommitNode
// Paths that are still on the branch represented by commit
paths [ ] string
// Set of hashes for the paths
hashes map [ string ] plumbing . Hash
}
func getCommitTree ( c cgobject . CommitNode , treePath string ) ( * object . Tree , error ) {
tree , err := c . Tree ( )
if err != nil {
return nil , err
}
// Optimize deep traversals by focusing only on the specific tree
if treePath != "" {
tree , err = tree . Tree ( treePath )
if err != nil {
return nil , err
}
}
return tree , nil
}
func getFileHashes ( c cgobject . CommitNode , treePath string , paths [ ] string ) ( map [ string ] plumbing . Hash , error ) {
tree , err := getCommitTree ( c , treePath )
if err == object . ErrDirectoryNotFound {
// The whole tree didn't exist, so return empty map
return make ( map [ string ] plumbing . Hash ) , nil
}
if err != nil {
return nil , err
}
hashes := make ( map [ string ] plumbing . Hash )
for _ , path := range paths {
if path != "" {
entry , err := tree . FindEntry ( path )
if err == nil {
hashes [ path ] = entry . Hash
}
} else {
hashes [ path ] = tree . Hash
}
}
return hashes , nil
}
2022-07-25 18:39:42 +03:00
func getLastCommitForPathsByCache ( commitID , treePath string , paths [ ] string , cache * LastCommitCache ) ( map [ string ] * Commit , [ ] string , error ) {
2020-12-17 17:00:47 +03:00
var unHitEntryPaths [ ] string
2022-07-25 18:39:42 +03:00
results := make ( map [ string ] * Commit )
2020-12-17 17:00:47 +03:00
for _ , p := range paths {
lastCommit , err := cache . Get ( commitID , path . Join ( treePath , p ) )
if err != nil {
return nil , nil , err
}
if lastCommit != nil {
2022-07-25 18:39:42 +03:00
results [ p ] = lastCommit
2020-12-17 17:00:47 +03:00
continue
}
unHitEntryPaths = append ( unHitEntryPaths , p )
}
return results , unHitEntryPaths , nil
}
// GetLastCommitForPaths returns last commit information
2022-07-25 18:39:42 +03:00
func GetLastCommitForPaths ( ctx context . Context , cache * LastCommitCache , c cgobject . CommitNode , treePath string , paths [ ] string ) ( map [ string ] * Commit , error ) {
2021-10-08 16:08:22 +03:00
refSha := c . ID ( ) . String ( )
2020-12-17 17:00:47 +03:00
// We do a tree traversal with nodes sorted by commit time
heap := binaryheap . NewWith ( func ( a , b interface { } ) int {
if a . ( * commitAndPaths ) . commit . CommitTime ( ) . Before ( b . ( * commitAndPaths ) . commit . CommitTime ( ) ) {
return 1
}
return - 1
} )
resultNodes := make ( map [ string ] cgobject . CommitNode )
initialHashes , err := getFileHashes ( c , treePath , paths )
if err != nil {
return nil , err
}
// Start search from the root commit and with full set of paths
heap . Push ( & commitAndPaths { c , paths , initialHashes } )
2021-10-08 16:08:22 +03:00
heaploop :
2020-12-17 17:00:47 +03:00
for {
2021-06-07 02:44:58 +03:00
select {
case <- ctx . Done ( ) :
2021-10-08 16:08:22 +03:00
if ctx . Err ( ) == context . DeadlineExceeded {
break heaploop
}
2021-06-07 02:44:58 +03:00
return nil , ctx . Err ( )
default :
}
2020-12-17 17:00:47 +03:00
cIn , ok := heap . Pop ( )
if ! ok {
break
}
current := cIn . ( * commitAndPaths )
// Load the parent commits for the one we are currently examining
numParents := current . commit . NumParents ( )
var parents [ ] cgobject . CommitNode
for i := 0 ; i < numParents ; i ++ {
parent , err := current . commit . ParentNode ( i )
if err != nil {
break
}
parents = append ( parents , parent )
}
// Examine the current commit and set of interesting paths
pathUnchanged := make ( [ ] bool , len ( current . paths ) )
parentHashes := make ( [ ] map [ string ] plumbing . Hash , len ( parents ) )
for j , parent := range parents {
parentHashes [ j ] , err = getFileHashes ( parent , treePath , current . paths )
if err != nil {
break
}
for i , path := range current . paths {
if parentHashes [ j ] [ path ] == current . hashes [ path ] {
pathUnchanged [ i ] = true
}
}
}
var remainingPaths [ ] string
2021-10-08 16:08:22 +03:00
for i , pth := range current . paths {
2020-12-17 17:00:47 +03:00
// The results could already contain some newer change for the same path,
// so don't override that and bail out on the file early.
2021-10-08 16:08:22 +03:00
if resultNodes [ pth ] == nil {
2020-12-17 17:00:47 +03:00
if pathUnchanged [ i ] {
// The path existed with the same hash in at least one parent so it could
// not have been changed in this commit directly.
2021-10-08 16:08:22 +03:00
remainingPaths = append ( remainingPaths , pth )
2020-12-17 17:00:47 +03:00
} else {
// There are few possible cases how can we get here:
// - The path didn't exist in any parent, so it must have been created by
// this commit.
// - The path did exist in the parent commit, but the hash of the file has
// changed.
// - We are looking at a merge commit and the hash of the file doesn't
// match any of the hashes being merged. This is more common for directories,
// but it can also happen if a file is changed through conflict resolution.
2021-10-08 16:08:22 +03:00
resultNodes [ pth ] = current . commit
if err := cache . Put ( refSha , path . Join ( treePath , pth ) , current . commit . ID ( ) . String ( ) ) ; err != nil {
return nil , err
}
2020-12-17 17:00:47 +03:00
}
}
}
if len ( remainingPaths ) > 0 {
// Add the parent nodes along with remaining paths to the heap for further
// processing.
for j , parent := range parents {
// Combine remainingPath with paths available on the parent branch
// and make union of them
remainingPathsForParent := make ( [ ] string , 0 , len ( remainingPaths ) )
newRemainingPaths := make ( [ ] string , 0 , len ( remainingPaths ) )
for _ , path := range remainingPaths {
if parentHashes [ j ] [ path ] == current . hashes [ path ] {
remainingPathsForParent = append ( remainingPathsForParent , path )
} else {
newRemainingPaths = append ( newRemainingPaths , path )
}
}
if remainingPathsForParent != nil {
heap . Push ( & commitAndPaths { parent , remainingPathsForParent , parentHashes [ j ] } )
}
if len ( newRemainingPaths ) == 0 {
break
} else {
remainingPaths = newRemainingPaths
}
}
}
}
// Post-processing
2022-07-25 18:39:42 +03:00
result := make ( map [ string ] * Commit )
2020-12-17 17:00:47 +03:00
for path , commitNode := range resultNodes {
2022-07-25 18:39:42 +03:00
commit , err := commitNode . Commit ( )
2020-12-17 17:00:47 +03:00
if err != nil {
return nil , err
}
2022-07-25 18:39:42 +03:00
result [ path ] = convertCommit ( commit )
2020-12-17 17:00:47 +03:00
}
return result , nil
}