2020-12-17 17:00:47 +03:00
// Copyright 2020 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package git
import (
"bufio"
"bytes"
2021-07-20 20:23:01 +03:00
"context"
2021-09-18 03:54:15 +03:00
"fmt"
2021-03-04 05:57:01 +03:00
"io"
2020-12-17 17:00:47 +03:00
"math"
2021-09-18 03:54:15 +03:00
"runtime"
2020-12-17 17:00:47 +03:00
"strconv"
2021-03-04 05:57:01 +03:00
"strings"
2021-06-21 01:00:46 +03:00
2021-06-25 19:54:08 +03:00
"code.gitea.io/gitea/modules/log"
2021-06-21 01:00:46 +03:00
"github.com/djherbis/buffer"
"github.com/djherbis/nio/v3"
2020-12-17 17:00:47 +03:00
)
2021-05-10 04:27:03 +03:00
// WriteCloserError wraps an io.WriteCloser with an additional CloseWithError function
type WriteCloserError interface {
io . WriteCloser
CloseWithError ( err error ) error
}
// CatFileBatchCheck opens git cat-file --batch-check in the provided repo and returns a stdin pipe, a stdout reader and cancel function
func CatFileBatchCheck ( repoPath string ) ( WriteCloserError , * bufio . Reader , func ( ) ) {
batchStdinReader , batchStdinWriter := io . Pipe ( )
batchStdoutReader , batchStdoutWriter := io . Pipe ( )
2021-07-20 20:23:01 +03:00
ctx , ctxCancel := context . WithCancel ( DefaultContext )
closed := make ( chan struct { } )
2021-05-10 04:27:03 +03:00
cancel := func ( ) {
_ = batchStdinReader . Close ( )
_ = batchStdinWriter . Close ( )
_ = batchStdoutReader . Close ( )
_ = batchStdoutWriter . Close ( )
2021-07-20 20:23:01 +03:00
ctxCancel ( )
<- closed
2021-05-10 04:27:03 +03:00
}
2021-09-18 03:54:15 +03:00
_ , filename , line , _ := runtime . Caller ( 2 )
filename = strings . TrimPrefix ( filename , callerPrefix )
2021-05-10 04:27:03 +03:00
go func ( ) {
stderr := strings . Builder { }
2021-09-18 03:54:15 +03:00
err := NewCommandContext ( ctx , "cat-file" , "--batch-check" ) .
SetDescription ( fmt . Sprintf ( "%s cat-file --batch-check [repo_path: %s] (%s:%d)" , GitExecutable , repoPath , filename , line ) ) .
RunInDirFullPipeline ( repoPath , batchStdoutWriter , & stderr , batchStdinReader )
2021-05-10 04:27:03 +03:00
if err != nil {
_ = batchStdoutWriter . CloseWithError ( ConcatenateError ( err , ( & stderr ) . String ( ) ) )
_ = batchStdinReader . CloseWithError ( ConcatenateError ( err , ( & stderr ) . String ( ) ) )
} else {
_ = batchStdoutWriter . Close ( )
_ = batchStdinReader . Close ( )
}
2021-07-20 20:23:01 +03:00
close ( closed )
2021-05-10 04:27:03 +03:00
} ( )
2021-06-21 01:00:46 +03:00
// For simplicities sake we'll use a buffered reader to read from the cat-file --batch-check
2021-05-10 04:27:03 +03:00
batchReader := bufio . NewReader ( batchStdoutReader )
return batchStdinWriter , batchReader , cancel
}
2021-03-04 05:57:01 +03:00
// CatFileBatch opens git cat-file --batch in the provided repo and returns a stdin pipe, a stdout reader and cancel function
2021-05-10 04:27:03 +03:00
func CatFileBatch ( repoPath string ) ( WriteCloserError , * bufio . Reader , func ( ) ) {
// We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
2021-03-04 05:57:01 +03:00
// so let's create a batch stdin and stdout
batchStdinReader , batchStdinWriter := io . Pipe ( )
2021-06-21 01:00:46 +03:00
batchStdoutReader , batchStdoutWriter := nio . Pipe ( buffer . New ( 32 * 1024 ) )
2021-07-20 20:23:01 +03:00
ctx , ctxCancel := context . WithCancel ( DefaultContext )
closed := make ( chan struct { } )
2021-03-04 05:57:01 +03:00
cancel := func ( ) {
_ = batchStdinReader . Close ( )
_ = batchStdinWriter . Close ( )
_ = batchStdoutReader . Close ( )
_ = batchStdoutWriter . Close ( )
2021-07-20 20:23:01 +03:00
ctxCancel ( )
<- closed
2021-03-04 05:57:01 +03:00
}
2021-09-18 03:54:15 +03:00
_ , filename , line , _ := runtime . Caller ( 2 )
filename = strings . TrimPrefix ( filename , callerPrefix )
2021-03-04 05:57:01 +03:00
go func ( ) {
stderr := strings . Builder { }
2021-09-18 03:54:15 +03:00
err := NewCommandContext ( ctx , "cat-file" , "--batch" ) .
SetDescription ( fmt . Sprintf ( "%s cat-file --batch [repo_path: %s] (%s:%d)" , GitExecutable , repoPath , filename , line ) ) .
RunInDirFullPipeline ( repoPath , batchStdoutWriter , & stderr , batchStdinReader )
2021-03-04 05:57:01 +03:00
if err != nil {
_ = batchStdoutWriter . CloseWithError ( ConcatenateError ( err , ( & stderr ) . String ( ) ) )
_ = batchStdinReader . CloseWithError ( ConcatenateError ( err , ( & stderr ) . String ( ) ) )
} else {
_ = batchStdoutWriter . Close ( )
_ = batchStdinReader . Close ( )
}
2021-07-20 20:23:01 +03:00
close ( closed )
2021-03-04 05:57:01 +03:00
} ( )
// For simplicities sake we'll us a buffered reader to read from the cat-file --batch
2021-06-21 01:00:46 +03:00
batchReader := bufio . NewReaderSize ( batchStdoutReader , 32 * 1024 )
2021-03-04 05:57:01 +03:00
return batchStdinWriter , batchReader , cancel
}
2020-12-17 17:00:47 +03:00
// ReadBatchLine reads the header line from cat-file --batch
// We expect:
// <sha> SP <type> SP <size> LF
2021-05-10 04:27:03 +03:00
// sha is a 40byte not 20byte here
2020-12-17 17:00:47 +03:00
func ReadBatchLine ( rd * bufio . Reader ) ( sha [ ] byte , typ string , size int64 , err error ) {
2021-06-21 01:00:46 +03:00
typ , err = rd . ReadString ( '\n' )
2020-12-17 17:00:47 +03:00
if err != nil {
return
}
2021-06-21 01:00:46 +03:00
if len ( typ ) == 1 {
typ , err = rd . ReadString ( '\n' )
if err != nil {
return
}
}
idx := strings . IndexByte ( typ , ' ' )
if idx < 0 {
2021-06-25 19:54:08 +03:00
log . Debug ( "missing space typ: %s" , typ )
2021-06-21 01:00:46 +03:00
err = ErrNotExist { ID : string ( sha ) }
2020-12-17 17:00:47 +03:00
return
}
2021-06-21 01:00:46 +03:00
sha = [ ] byte ( typ [ : idx ] )
typ = typ [ idx + 1 : ]
2020-12-17 17:00:47 +03:00
2021-06-21 01:00:46 +03:00
idx = strings . IndexByte ( typ , ' ' )
2021-05-10 04:27:03 +03:00
if idx < 0 {
err = ErrNotExist { ID : string ( sha ) }
2020-12-17 17:00:47 +03:00
return
}
2021-06-21 01:00:46 +03:00
2021-05-10 04:27:03 +03:00
sizeStr := typ [ idx + 1 : len ( typ ) - 1 ]
typ = typ [ : idx ]
2020-12-17 17:00:47 +03:00
2021-05-10 04:27:03 +03:00
size , err = strconv . ParseInt ( sizeStr , 10 , 64 )
2020-12-17 17:00:47 +03:00
return
}
// ReadTagObjectID reads a tag object ID hash from a cat-file --batch stream, throwing away the rest of the stream.
func ReadTagObjectID ( rd * bufio . Reader , size int64 ) ( string , error ) {
id := ""
var n int64
headerLoop :
for {
line , err := rd . ReadBytes ( '\n' )
if err != nil {
return "" , err
}
n += int64 ( len ( line ) )
idx := bytes . Index ( line , [ ] byte { ' ' } )
if idx < 0 {
continue
}
if string ( line [ : idx ] ) == "object" {
id = string ( line [ idx + 1 : len ( line ) - 1 ] )
break headerLoop
}
}
// Discard the rest of the tag
2021-06-21 01:00:46 +03:00
discard := size - n + 1
2020-12-17 17:00:47 +03:00
for discard > math . MaxInt32 {
_ , err := rd . Discard ( math . MaxInt32 )
if err != nil {
return id , err
}
discard -= math . MaxInt32
}
_ , err := rd . Discard ( int ( discard ) )
return id , err
}
// ReadTreeID reads a tree ID from a cat-file --batch stream, throwing away the rest of the stream.
func ReadTreeID ( rd * bufio . Reader , size int64 ) ( string , error ) {
id := ""
var n int64
headerLoop :
for {
line , err := rd . ReadBytes ( '\n' )
if err != nil {
return "" , err
}
n += int64 ( len ( line ) )
idx := bytes . Index ( line , [ ] byte { ' ' } )
if idx < 0 {
continue
}
if string ( line [ : idx ] ) == "tree" {
id = string ( line [ idx + 1 : len ( line ) - 1 ] )
break headerLoop
}
}
// Discard the rest of the commit
2021-05-10 04:27:03 +03:00
discard := size - n + 1
2020-12-17 17:00:47 +03:00
for discard > math . MaxInt32 {
_ , err := rd . Discard ( math . MaxInt32 )
if err != nil {
return id , err
}
discard -= math . MaxInt32
}
_ , err := rd . Discard ( int ( discard ) )
return id , err
}
// git tree files are a list:
// <mode-in-ascii> SP <fname> NUL <20-byte SHA>
//
// Unfortunately this 20-byte notation is somewhat in conflict to all other git tools
// Therefore we need some method to convert these 20-byte SHAs to a 40-byte SHA
// constant hextable to help quickly convert between 20byte and 40byte hashes
const hextable = "0123456789abcdef"
2021-05-14 16:12:11 +03:00
// To40ByteSHA converts a 20-byte SHA into a 40-byte sha. Input and output can be the
// same 40 byte slice to support in place conversion without allocations.
// This is at least 100x quicker that hex.EncodeToString
// NB This requires that out is a 40-byte slice
func To40ByteSHA ( sha , out [ ] byte ) [ ] byte {
2020-12-17 17:00:47 +03:00
for i := 19 ; i >= 0 ; i -- {
v := sha [ i ]
vhi , vlo := v >> 4 , v & 0x0f
shi , slo := hextable [ vhi ] , hextable [ vlo ]
2021-05-14 16:12:11 +03:00
out [ i * 2 ] , out [ i * 2 + 1 ] = shi , slo
2020-12-17 17:00:47 +03:00
}
2021-05-14 16:12:11 +03:00
return out
2020-12-17 17:00:47 +03:00
}
2021-06-21 01:00:46 +03:00
// ParseTreeLine reads an entry from a tree in a cat-file --batch stream
// This carefully avoids allocations - except where fnameBuf is too small.
2020-12-17 17:00:47 +03:00
// It is recommended therefore to pass in an fnameBuf large enough to avoid almost all allocations
//
// Each line is composed of:
// <mode-in-ascii-dropping-initial-zeros> SP <fname> NUL <20-byte SHA>
//
// We don't attempt to convert the 20-byte SHA to 40-byte SHA to save a lot of time
2021-06-21 01:00:46 +03:00
func ParseTreeLine ( rd * bufio . Reader , modeBuf , fnameBuf , shaBuf [ ] byte ) ( mode , fname , sha [ ] byte , n int , err error ) {
2020-12-17 17:00:47 +03:00
var readBytes [ ] byte
2021-06-21 01:00:46 +03:00
// Read the Mode & fname
2020-12-17 17:00:47 +03:00
readBytes , err = rd . ReadSlice ( '\x00' )
if err != nil {
return
}
2021-06-21 01:00:46 +03:00
idx := bytes . IndexByte ( readBytes , ' ' )
if idx < 0 {
2021-06-25 19:54:08 +03:00
log . Debug ( "missing space in readBytes ParseTreeLine: %s" , readBytes )
2020-12-17 17:00:47 +03:00
2021-06-21 01:00:46 +03:00
err = & ErrNotExist { }
2020-12-17 17:00:47 +03:00
return
}
2021-06-21 01:00:46 +03:00
n += idx + 1
copy ( modeBuf , readBytes [ : idx ] )
if len ( modeBuf ) >= idx {
modeBuf = modeBuf [ : idx ]
} else {
modeBuf = append ( modeBuf , readBytes [ len ( modeBuf ) : idx ] ... )
2020-12-17 17:00:47 +03:00
}
2021-06-21 01:00:46 +03:00
mode = modeBuf
readBytes = readBytes [ idx + 1 : ]
2020-12-17 17:00:47 +03:00
// Deal with the fname
copy ( fnameBuf , readBytes )
if len ( fnameBuf ) > len ( readBytes ) {
fnameBuf = fnameBuf [ : len ( readBytes ) ]
} else {
fnameBuf = append ( fnameBuf , readBytes [ len ( fnameBuf ) : ] ... )
}
for err == bufio . ErrBufferFull {
readBytes , err = rd . ReadSlice ( '\x00' )
fnameBuf = append ( fnameBuf , readBytes ... )
}
n += len ( fnameBuf )
if err != nil {
return
}
fnameBuf = fnameBuf [ : len ( fnameBuf ) - 1 ]
fname = fnameBuf
// Deal with the 20-byte SHA
2021-06-21 01:00:46 +03:00
idx = 0
2020-12-17 17:00:47 +03:00
for idx < 20 {
read := 0
read , err = rd . Read ( shaBuf [ idx : 20 ] )
n += read
if err != nil {
return
}
idx += read
}
sha = shaBuf
return
}
2021-09-18 03:54:15 +03:00
var callerPrefix string
func init ( ) {
_ , filename , _ , _ := runtime . Caller ( 0 )
callerPrefix = strings . TrimSuffix ( filename , "modules/git/batch_reader.go" )
}