2020-12-17 17:00:47 +03:00
// Copyright 2020 The Gitea Authors. All rights reserved.
2022-11-27 21:20:29 +03:00
// SPDX-License-Identifier: MIT
2020-12-17 17:00:47 +03:00
package git
import (
"bufio"
"bytes"
2021-07-20 20:23:01 +03:00
"context"
2021-09-18 03:54:15 +03:00
"fmt"
2021-03-04 05:57:01 +03:00
"io"
2020-12-17 17:00:47 +03:00
"math"
2021-09-18 03:54:15 +03:00
"runtime"
2020-12-17 17:00:47 +03:00
"strconv"
2021-03-04 05:57:01 +03:00
"strings"
2021-06-21 01:00:46 +03:00
2021-06-25 19:54:08 +03:00
"code.gitea.io/gitea/modules/log"
2021-06-21 01:00:46 +03:00
"github.com/djherbis/buffer"
"github.com/djherbis/nio/v3"
2020-12-17 17:00:47 +03:00
)
2021-05-10 04:27:03 +03:00
// WriteCloserError wraps an io.WriteCloser with an additional CloseWithError function
type WriteCloserError interface {
io . WriteCloser
CloseWithError ( err error ) error
}
2024-08-20 20:04:57 +03:00
// ensureValidGitRepository runs git rev-parse in the repository path - thus ensuring that the repository is a valid repository.
2021-12-16 22:01:14 +03:00
// Run before opening git cat-file.
// This is needed otherwise the git cat-file will hang for invalid repositories.
2024-08-20 20:04:57 +03:00
func ensureValidGitRepository ( ctx context . Context , repoPath string ) error {
2021-12-16 22:01:14 +03:00
stderr := strings . Builder { }
2022-02-06 22:01:47 +03:00
err := NewCommand ( ctx , "rev-parse" ) .
2021-12-16 22:01:14 +03:00
SetDescription ( fmt . Sprintf ( "%s rev-parse [repo_path: %s]" , GitExecutable , repoPath ) ) .
2022-04-01 05:55:30 +03:00
Run ( & RunOpts {
Dir : repoPath ,
Stderr : & stderr ,
2022-02-11 15:47:22 +03:00
} )
2021-12-16 22:01:14 +03:00
if err != nil {
return ConcatenateError ( err , ( & stderr ) . String ( ) )
}
return nil
}
2024-08-20 20:04:57 +03:00
// catFileBatchCheck opens git cat-file --batch-check in the provided repo and returns a stdin pipe, a stdout reader and cancel function
func catFileBatchCheck ( ctx context . Context , repoPath string ) ( WriteCloserError , * bufio . Reader , func ( ) ) {
2021-05-10 04:27:03 +03:00
batchStdinReader , batchStdinWriter := io . Pipe ( )
batchStdoutReader , batchStdoutWriter := io . Pipe ( )
2021-11-30 23:06:32 +03:00
ctx , ctxCancel := context . WithCancel ( ctx )
2021-07-20 20:23:01 +03:00
closed := make ( chan struct { } )
2021-05-10 04:27:03 +03:00
cancel := func ( ) {
2021-07-20 20:23:01 +03:00
ctxCancel ( )
2021-11-30 23:06:32 +03:00
_ = batchStdoutReader . Close ( )
_ = batchStdinWriter . Close ( )
2021-07-20 20:23:01 +03:00
<- closed
2021-05-10 04:27:03 +03:00
}
2022-04-22 18:20:04 +03:00
// Ensure cancel is called as soon as the provided context is cancelled
go func ( ) {
<- ctx . Done ( )
cancel ( )
} ( )
2021-09-18 03:54:15 +03:00
_ , filename , line , _ := runtime . Caller ( 2 )
filename = strings . TrimPrefix ( filename , callerPrefix )
2021-05-10 04:27:03 +03:00
go func ( ) {
stderr := strings . Builder { }
2022-02-06 22:01:47 +03:00
err := NewCommand ( ctx , "cat-file" , "--batch-check" ) .
2021-09-18 03:54:15 +03:00
SetDescription ( fmt . Sprintf ( "%s cat-file --batch-check [repo_path: %s] (%s:%d)" , GitExecutable , repoPath , filename , line ) ) .
2022-04-01 05:55:30 +03:00
Run ( & RunOpts {
Dir : repoPath ,
Stdin : batchStdinReader ,
Stdout : batchStdoutWriter ,
Stderr : & stderr ,
2023-08-04 15:50:41 +03:00
UseContextTimeout : true ,
2022-02-11 15:47:22 +03:00
} )
2021-05-10 04:27:03 +03:00
if err != nil {
_ = batchStdoutWriter . CloseWithError ( ConcatenateError ( err , ( & stderr ) . String ( ) ) )
_ = batchStdinReader . CloseWithError ( ConcatenateError ( err , ( & stderr ) . String ( ) ) )
} else {
_ = batchStdoutWriter . Close ( )
_ = batchStdinReader . Close ( )
}
2021-07-20 20:23:01 +03:00
close ( closed )
2021-05-10 04:27:03 +03:00
} ( )
2021-06-21 01:00:46 +03:00
// For simplicities sake we'll use a buffered reader to read from the cat-file --batch-check
2021-05-10 04:27:03 +03:00
batchReader := bufio . NewReader ( batchStdoutReader )
return batchStdinWriter , batchReader , cancel
}
2024-08-20 20:04:57 +03:00
// catFileBatch opens git cat-file --batch in the provided repo and returns a stdin pipe, a stdout reader and cancel function
func catFileBatch ( ctx context . Context , repoPath string ) ( WriteCloserError , * bufio . Reader , func ( ) ) {
2021-05-10 04:27:03 +03:00
// We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
2021-03-04 05:57:01 +03:00
// so let's create a batch stdin and stdout
batchStdinReader , batchStdinWriter := io . Pipe ( )
2021-06-21 01:00:46 +03:00
batchStdoutReader , batchStdoutWriter := nio . Pipe ( buffer . New ( 32 * 1024 ) )
2021-11-30 23:06:32 +03:00
ctx , ctxCancel := context . WithCancel ( ctx )
2021-07-20 20:23:01 +03:00
closed := make ( chan struct { } )
2021-03-04 05:57:01 +03:00
cancel := func ( ) {
2021-11-30 23:06:32 +03:00
ctxCancel ( )
2021-03-04 05:57:01 +03:00
_ = batchStdinWriter . Close ( )
_ = batchStdoutReader . Close ( )
2021-07-20 20:23:01 +03:00
<- closed
2021-03-04 05:57:01 +03:00
}
2022-04-22 18:20:04 +03:00
// Ensure cancel is called as soon as the provided context is cancelled
go func ( ) {
<- ctx . Done ( )
cancel ( )
} ( )
2021-09-18 03:54:15 +03:00
_ , filename , line , _ := runtime . Caller ( 2 )
filename = strings . TrimPrefix ( filename , callerPrefix )
2021-03-04 05:57:01 +03:00
go func ( ) {
stderr := strings . Builder { }
2022-02-06 22:01:47 +03:00
err := NewCommand ( ctx , "cat-file" , "--batch" ) .
2021-09-18 03:54:15 +03:00
SetDescription ( fmt . Sprintf ( "%s cat-file --batch [repo_path: %s] (%s:%d)" , GitExecutable , repoPath , filename , line ) ) .
2022-04-01 05:55:30 +03:00
Run ( & RunOpts {
Dir : repoPath ,
Stdin : batchStdinReader ,
Stdout : batchStdoutWriter ,
Stderr : & stderr ,
2023-08-04 15:50:41 +03:00
UseContextTimeout : true ,
2022-02-11 15:47:22 +03:00
} )
2021-03-04 05:57:01 +03:00
if err != nil {
_ = batchStdoutWriter . CloseWithError ( ConcatenateError ( err , ( & stderr ) . String ( ) ) )
_ = batchStdinReader . CloseWithError ( ConcatenateError ( err , ( & stderr ) . String ( ) ) )
} else {
_ = batchStdoutWriter . Close ( )
_ = batchStdinReader . Close ( )
}
2021-07-20 20:23:01 +03:00
close ( closed )
2021-03-04 05:57:01 +03:00
} ( )
// For simplicities sake we'll us a buffered reader to read from the cat-file --batch
2021-06-21 01:00:46 +03:00
batchReader := bufio . NewReaderSize ( batchStdoutReader , 32 * 1024 )
2021-03-04 05:57:01 +03:00
return batchStdinWriter , batchReader , cancel
}
2020-12-17 17:00:47 +03:00
// ReadBatchLine reads the header line from cat-file --batch
2024-11-12 05:38:22 +03:00
// We expect: <oid> SP <type> SP <size> LF
// then leaving the rest of the stream "<contents> LF" to be read
2020-12-17 17:00:47 +03:00
func ReadBatchLine ( rd * bufio . Reader ) ( sha [ ] byte , typ string , size int64 , err error ) {
2021-06-21 01:00:46 +03:00
typ , err = rd . ReadString ( '\n' )
2020-12-17 17:00:47 +03:00
if err != nil {
2023-07-09 14:58:06 +03:00
return sha , typ , size , err
2020-12-17 17:00:47 +03:00
}
2021-06-21 01:00:46 +03:00
if len ( typ ) == 1 {
typ , err = rd . ReadString ( '\n' )
if err != nil {
2023-07-09 14:58:06 +03:00
return sha , typ , size , err
2021-06-21 01:00:46 +03:00
}
}
idx := strings . IndexByte ( typ , ' ' )
if idx < 0 {
2021-06-25 19:54:08 +03:00
log . Debug ( "missing space typ: %s" , typ )
2023-07-09 14:58:06 +03:00
return sha , typ , size , ErrNotExist { ID : string ( sha ) }
2020-12-17 17:00:47 +03:00
}
2021-06-21 01:00:46 +03:00
sha = [ ] byte ( typ [ : idx ] )
typ = typ [ idx + 1 : ]
2020-12-17 17:00:47 +03:00
2021-06-21 01:00:46 +03:00
idx = strings . IndexByte ( typ , ' ' )
2021-05-10 04:27:03 +03:00
if idx < 0 {
2023-07-09 14:58:06 +03:00
return sha , typ , size , ErrNotExist { ID : string ( sha ) }
2020-12-17 17:00:47 +03:00
}
2021-06-21 01:00:46 +03:00
2021-05-10 04:27:03 +03:00
sizeStr := typ [ idx + 1 : len ( typ ) - 1 ]
typ = typ [ : idx ]
2020-12-17 17:00:47 +03:00
2021-05-10 04:27:03 +03:00
size , err = strconv . ParseInt ( sizeStr , 10 , 64 )
2022-06-20 13:02:49 +03:00
return sha , typ , size , err
2020-12-17 17:00:47 +03:00
}
// ReadTagObjectID reads a tag object ID hash from a cat-file --batch stream, throwing away the rest of the stream.
func ReadTagObjectID ( rd * bufio . Reader , size int64 ) ( string , error ) {
2022-06-20 13:02:49 +03:00
var id string
2020-12-17 17:00:47 +03:00
var n int64
headerLoop :
for {
line , err := rd . ReadBytes ( '\n' )
if err != nil {
return "" , err
}
n += int64 ( len ( line ) )
idx := bytes . Index ( line , [ ] byte { ' ' } )
if idx < 0 {
continue
}
if string ( line [ : idx ] ) == "object" {
id = string ( line [ idx + 1 : len ( line ) - 1 ] )
break headerLoop
}
}
// Discard the rest of the tag
2024-02-22 06:48:19 +03:00
return id , DiscardFull ( rd , size - n + 1 )
2020-12-17 17:00:47 +03:00
}
// ReadTreeID reads a tree ID from a cat-file --batch stream, throwing away the rest of the stream.
func ReadTreeID ( rd * bufio . Reader , size int64 ) ( string , error ) {
2022-06-20 13:02:49 +03:00
var id string
2020-12-17 17:00:47 +03:00
var n int64
headerLoop :
for {
line , err := rd . ReadBytes ( '\n' )
if err != nil {
return "" , err
}
n += int64 ( len ( line ) )
idx := bytes . Index ( line , [ ] byte { ' ' } )
if idx < 0 {
continue
}
if string ( line [ : idx ] ) == "tree" {
id = string ( line [ idx + 1 : len ( line ) - 1 ] )
break headerLoop
}
}
// Discard the rest of the commit
2024-02-22 06:48:19 +03:00
return id , DiscardFull ( rd , size - n + 1 )
2020-12-17 17:00:47 +03:00
}
// git tree files are a list:
2023-12-14 00:02:00 +03:00
// <mode-in-ascii> SP <fname> NUL <binary Hash>
2020-12-17 17:00:47 +03:00
//
// Unfortunately this 20-byte notation is somewhat in conflict to all other git tools
2023-12-14 00:02:00 +03:00
// Therefore we need some method to convert these binary hashes to hex hashes
2020-12-17 17:00:47 +03:00
2023-12-14 00:02:00 +03:00
// constant hextable to help quickly convert between binary and hex representation
2020-12-17 17:00:47 +03:00
const hextable = "0123456789abcdef"
2023-12-14 00:02:00 +03:00
// BinToHexHeash converts a binary Hash into a hex encoded one. Input and output can be the
// same byte slice to support in place conversion without allocations.
2021-05-14 16:12:11 +03:00
// This is at least 100x quicker that hex.EncodeToString
2023-12-14 00:02:00 +03:00
func BinToHex ( objectFormat ObjectFormat , sha , out [ ] byte ) [ ] byte {
for i := objectFormat . FullLength ( ) / 2 - 1 ; i >= 0 ; i -- {
2020-12-17 17:00:47 +03:00
v := sha [ i ]
vhi , vlo := v >> 4 , v & 0x0f
shi , slo := hextable [ vhi ] , hextable [ vlo ]
2021-05-14 16:12:11 +03:00
out [ i * 2 ] , out [ i * 2 + 1 ] = shi , slo
2020-12-17 17:00:47 +03:00
}
2021-05-14 16:12:11 +03:00
return out
2020-12-17 17:00:47 +03:00
}
2021-06-21 01:00:46 +03:00
// ParseTreeLine reads an entry from a tree in a cat-file --batch stream
// This carefully avoids allocations - except where fnameBuf is too small.
2020-12-17 17:00:47 +03:00
// It is recommended therefore to pass in an fnameBuf large enough to avoid almost all allocations
//
// Each line is composed of:
2023-12-14 00:02:00 +03:00
// <mode-in-ascii-dropping-initial-zeros> SP <fname> NUL <binary HASH>
2020-12-17 17:00:47 +03:00
//
2023-12-14 00:02:00 +03:00
// We don't attempt to convert the raw HASH to save a lot of time
func ParseTreeLine ( objectFormat ObjectFormat , rd * bufio . Reader , modeBuf , fnameBuf , shaBuf [ ] byte ) ( mode , fname , sha [ ] byte , n int , err error ) {
2020-12-17 17:00:47 +03:00
var readBytes [ ] byte
2021-06-21 01:00:46 +03:00
// Read the Mode & fname
2020-12-17 17:00:47 +03:00
readBytes , err = rd . ReadSlice ( '\x00' )
if err != nil {
2023-07-09 14:58:06 +03:00
return mode , fname , sha , n , err
2020-12-17 17:00:47 +03:00
}
2021-06-21 01:00:46 +03:00
idx := bytes . IndexByte ( readBytes , ' ' )
if idx < 0 {
2021-06-25 19:54:08 +03:00
log . Debug ( "missing space in readBytes ParseTreeLine: %s" , readBytes )
2023-07-09 14:58:06 +03:00
return mode , fname , sha , n , & ErrNotExist { }
2020-12-17 17:00:47 +03:00
}
2021-06-21 01:00:46 +03:00
n += idx + 1
copy ( modeBuf , readBytes [ : idx ] )
if len ( modeBuf ) >= idx {
modeBuf = modeBuf [ : idx ]
} else {
modeBuf = append ( modeBuf , readBytes [ len ( modeBuf ) : idx ] ... )
2020-12-17 17:00:47 +03:00
}
2021-06-21 01:00:46 +03:00
mode = modeBuf
readBytes = readBytes [ idx + 1 : ]
2020-12-17 17:00:47 +03:00
// Deal with the fname
copy ( fnameBuf , readBytes )
if len ( fnameBuf ) > len ( readBytes ) {
fnameBuf = fnameBuf [ : len ( readBytes ) ]
} else {
fnameBuf = append ( fnameBuf , readBytes [ len ( fnameBuf ) : ] ... )
}
for err == bufio . ErrBufferFull {
readBytes , err = rd . ReadSlice ( '\x00' )
fnameBuf = append ( fnameBuf , readBytes ... )
}
n += len ( fnameBuf )
if err != nil {
2023-07-09 14:58:06 +03:00
return mode , fname , sha , n , err
2020-12-17 17:00:47 +03:00
}
fnameBuf = fnameBuf [ : len ( fnameBuf ) - 1 ]
fname = fnameBuf
2023-12-14 00:02:00 +03:00
// Deal with the binary hash
2021-06-21 01:00:46 +03:00
idx = 0
2024-04-22 14:48:42 +03:00
length := objectFormat . FullLength ( ) / 2
for idx < length {
2022-06-20 13:02:49 +03:00
var read int
2024-04-22 14:48:42 +03:00
read , err = rd . Read ( shaBuf [ idx : length ] )
2020-12-17 17:00:47 +03:00
n += read
if err != nil {
2023-07-09 14:58:06 +03:00
return mode , fname , sha , n , err
2020-12-17 17:00:47 +03:00
}
idx += read
}
sha = shaBuf
2022-06-20 13:02:49 +03:00
return mode , fname , sha , n , err
2020-12-17 17:00:47 +03:00
}
2021-09-18 03:54:15 +03:00
var callerPrefix string
func init ( ) {
_ , filename , _ , _ := runtime . Caller ( 0 )
callerPrefix = strings . TrimSuffix ( filename , "modules/git/batch_reader.go" )
}
2024-02-22 06:48:19 +03:00
func DiscardFull ( rd * bufio . Reader , discard int64 ) error {
if discard > math . MaxInt32 {
n , err := rd . Discard ( math . MaxInt32 )
discard -= int64 ( n )
if err != nil {
return err
}
}
for discard > 0 {
n , err := rd . Discard ( int ( discard ) )
discard -= int64 ( n )
if err != nil {
return err
}
}
return nil
}