2020-12-17 17:00:47 +03:00
// Copyright 2020 The Gitea Authors. All rights reserved.
2022-11-27 21:20:29 +03:00
// SPDX-License-Identifier: MIT
2020-12-17 17:00:47 +03:00
2021-08-24 19:47:09 +03:00
//go:build !gogit
2020-12-17 17:00:47 +03:00
package pipeline
import (
"bufio"
"bytes"
"fmt"
"io"
"sort"
"strings"
"sync"
"time"
"code.gitea.io/gitea/modules/git"
)
// LFSResult represents commits found using a provided pointer file hash
type LFSResult struct {
Name string
SHA string
Summary string
When time . Time
2023-12-14 00:02:00 +03:00
ParentIDs [ ] git . ObjectID
2020-12-17 17:00:47 +03:00
BranchName string
FullCommitName string
}
type lfsResultSlice [ ] * LFSResult
func ( a lfsResultSlice ) Len ( ) int { return len ( a ) }
func ( a lfsResultSlice ) Swap ( i , j int ) { a [ i ] , a [ j ] = a [ j ] , a [ i ] }
func ( a lfsResultSlice ) Less ( i , j int ) bool { return a [ j ] . When . After ( a [ i ] . When ) }
// FindLFSFile finds commits that contain a provided pointer file hash
2023-12-14 00:02:00 +03:00
func FindLFSFile ( repo * git . Repository , objectID git . ObjectID ) ( [ ] * LFSResult , error ) {
2020-12-17 17:00:47 +03:00
resultsMap := map [ string ] * LFSResult { }
results := make ( [ ] * LFSResult , 0 )
basePath := repo . Path
// Use rev-list to provide us with all commits in order
revListReader , revListWriter := io . Pipe ( )
defer func ( ) {
_ = revListWriter . Close ( )
_ = revListReader . Close ( )
} ( )
go func ( ) {
stderr := strings . Builder { }
2022-04-01 05:55:30 +03:00
err := git . NewCommand ( repo . Ctx , "rev-list" , "--all" ) . Run ( & git . RunOpts {
Dir : repo . Path ,
Stdout : revListWriter ,
Stderr : & stderr ,
2022-02-11 15:47:22 +03:00
} )
2020-12-17 17:00:47 +03:00
if err != nil {
_ = revListWriter . CloseWithError ( git . ConcatenateError ( err , ( & stderr ) . String ( ) ) )
} else {
_ = revListWriter . Close ( )
}
} ( )
// Next feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
// so let's create a batch stdin and stdout
2021-11-30 23:06:32 +03:00
batchStdinWriter , batchReader , cancel := repo . CatFileBatch ( repo . Ctx )
2021-03-04 05:57:01 +03:00
defer cancel ( )
2020-12-17 17:00:47 +03:00
// We'll use a scanner for the revList because it's simpler than a bufio.Reader
scan := bufio . NewScanner ( revListReader )
trees := [ ] [ ] byte { }
paths := [ ] string { }
fnameBuf := make ( [ ] byte , 4096 )
modeBuf := make ( [ ] byte , 40 )
2023-12-14 00:02:00 +03:00
workingShaBuf := make ( [ ] byte , objectID . Type ( ) . FullLength ( ) / 2 )
2020-12-17 17:00:47 +03:00
for scan . Scan ( ) {
// Get the next commit ID
commitID := scan . Bytes ( )
// push the commit to the cat-file --batch process
_ , err := batchStdinWriter . Write ( commitID )
if err != nil {
return nil , err
}
_ , err = batchStdinWriter . Write ( [ ] byte { '\n' } )
if err != nil {
return nil , err
}
var curCommit * git . Commit
curPath := ""
commitReadingLoop :
for {
_ , typ , size , err := git . ReadBatchLine ( batchReader )
if err != nil {
return nil , err
}
switch typ {
case "tag" :
// This shouldn't happen but if it does well just get the commit and try again
id , err := git . ReadTagObjectID ( batchReader , size )
if err != nil {
return nil , err
}
_ , err = batchStdinWriter . Write ( [ ] byte ( id + "\n" ) )
if err != nil {
return nil , err
}
continue
case "commit" :
// Read in the commit to get its tree and in case this is one of the last used commits
2023-12-19 10:20:47 +03:00
curCommit , err = git . CommitFromReader ( repo , git . MustIDFromString ( string ( commitID ) ) , io . LimitReader ( batchReader , size ) )
2020-12-17 17:00:47 +03:00
if err != nil {
return nil , err
}
2021-06-21 01:00:46 +03:00
if _ , err := batchReader . Discard ( 1 ) ; err != nil {
return nil , err
}
2020-12-17 17:00:47 +03:00
2023-12-14 00:02:00 +03:00
if _ , err := batchStdinWriter . Write ( [ ] byte ( curCommit . Tree . ID . String ( ) + "\n" ) ) ; err != nil {
2020-12-17 17:00:47 +03:00
return nil , err
}
curPath = ""
case "tree" :
var n int64
for n < size {
2023-12-14 00:02:00 +03:00
mode , fname , binObjectID , count , err := git . ParseTreeLine ( objectID . Type ( ) , batchReader , modeBuf , fnameBuf , workingShaBuf )
2020-12-17 17:00:47 +03:00
if err != nil {
return nil , err
}
n += int64 ( count )
2023-12-14 00:02:00 +03:00
if bytes . Equal ( binObjectID , objectID . RawValue ( ) ) {
2020-12-17 17:00:47 +03:00
result := LFSResult {
2023-12-14 00:02:00 +03:00
Name : curPath + string ( fname ) ,
SHA : curCommit . ID . String ( ) ,
Summary : strings . Split ( strings . TrimSpace ( curCommit . CommitMessage ) , "\n" ) [ 0 ] ,
When : curCommit . Author . When ,
ParentIDs : curCommit . Parents ,
2020-12-17 17:00:47 +03:00
}
resultsMap [ curCommit . ID . String ( ) + ":" + curPath + string ( fname ) ] = & result
} else if string ( mode ) == git . EntryModeTree . String ( ) {
2023-12-14 00:02:00 +03:00
hexObjectID := make ( [ ] byte , objectID . Type ( ) . FullLength ( ) )
git . BinToHex ( objectID . Type ( ) , binObjectID , hexObjectID )
trees = append ( trees , hexObjectID )
2020-12-17 17:00:47 +03:00
paths = append ( paths , curPath + string ( fname ) + "/" )
}
2021-06-21 01:00:46 +03:00
}
if _ , err := batchReader . Discard ( 1 ) ; err != nil {
return nil , err
2020-12-17 17:00:47 +03:00
}
if len ( trees ) > 0 {
_ , err := batchStdinWriter . Write ( trees [ len ( trees ) - 1 ] )
if err != nil {
return nil , err
}
_ , err = batchStdinWriter . Write ( [ ] byte ( "\n" ) )
if err != nil {
return nil , err
}
curPath = paths [ len ( paths ) - 1 ]
trees = trees [ : len ( trees ) - 1 ]
paths = paths [ : len ( paths ) - 1 ]
} else {
break commitReadingLoop
}
}
}
}
if err := scan . Err ( ) ; err != nil {
return nil , err
}
for _ , result := range resultsMap {
hasParent := false
2023-12-14 00:02:00 +03:00
for _ , parentID := range result . ParentIDs {
if _ , hasParent = resultsMap [ parentID . String ( ) + ":" + result . Name ] ; hasParent {
2020-12-17 17:00:47 +03:00
break
}
}
if ! hasParent {
results = append ( results , result )
}
}
sort . Sort ( lfsResultSlice ( results ) )
// Should really use a go-git function here but name-rev is not completed and recapitulating it is not simple
shasToNameReader , shasToNameWriter := io . Pipe ( )
nameRevStdinReader , nameRevStdinWriter := io . Pipe ( )
errChan := make ( chan error , 1 )
wg := sync . WaitGroup { }
wg . Add ( 3 )
go func ( ) {
defer wg . Done ( )
scanner := bufio . NewScanner ( nameRevStdinReader )
i := 0
for scanner . Scan ( ) {
line := scanner . Text ( )
if len ( line ) == 0 {
continue
}
result := results [ i ]
result . FullCommitName = line
result . BranchName = strings . Split ( line , "~" ) [ 0 ]
i ++
}
} ( )
2022-01-20 02:26:57 +03:00
go NameRevStdin ( repo . Ctx , shasToNameReader , nameRevStdinWriter , & wg , basePath )
2020-12-17 17:00:47 +03:00
go func ( ) {
defer wg . Done ( )
defer shasToNameWriter . Close ( )
for _ , result := range results {
2021-11-18 12:50:22 +03:00
_ , err := shasToNameWriter . Write ( [ ] byte ( result . SHA ) )
if err != nil {
errChan <- err
break
2020-12-17 17:00:47 +03:00
}
2021-11-18 12:50:22 +03:00
_ , err = shasToNameWriter . Write ( [ ] byte { '\n' } )
if err != nil {
errChan <- err
break
2020-12-17 17:00:47 +03:00
}
}
} ( )
wg . Wait ( )
select {
case err , has := <- errChan :
if has {
return nil , fmt . Errorf ( "Unable to obtain name for LFS files. Error: %w" , err )
}
default :
}
return results , nil
}