2021-04-09 00:25:57 +02:00
// Copyright 2021 The Gitea Authors. All rights reserved.
2022-11-27 13:20:29 -05:00
// SPDX-License-Identifier: MIT
2021-04-09 00:25:57 +02:00
package lfs
import (
"bufio"
"context"
"io"
"strconv"
2021-04-22 00:31:59 +08:00
"strings"
2021-04-09 00:25:57 +02:00
"sync"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/git/pipeline"
)
// SearchPointerBlobs scans the whole repository for LFS pointer files
func SearchPointerBlobs ( ctx context . Context , repo * git . Repository , pointerChan chan <- PointerBlob , errChan chan <- error ) {
basePath := repo . Path
catFileCheckReader , catFileCheckWriter := io . Pipe ( )
shasToBatchReader , shasToBatchWriter := io . Pipe ( )
catFileBatchReader , catFileBatchWriter := io . Pipe ( )
wg := sync . WaitGroup { }
wg . Add ( 4 )
// Create the go-routines in reverse order.
// 4. Take the output of cat-file --batch and check if each file in turn
// to see if they're pointers to files in the LFS store
go createPointerResultsFromCatFileBatch ( ctx , catFileBatchReader , & wg , pointerChan )
// 3. Take the shas of the blobs and batch read them
2022-01-19 23:26:57 +00:00
go pipeline . CatFileBatch ( ctx , shasToBatchReader , catFileBatchWriter , & wg , basePath )
2021-04-09 00:25:57 +02:00
// 2. From the provided objects restrict to blobs <=1k
go pipeline . BlobsLessThan1024FromCatFileBatchCheck ( catFileCheckReader , shasToBatchWriter , & wg )
// 1. Run batch-check on all objects in the repository
if git . CheckGitVersionAtLeast ( "2.6.0" ) != nil {
revListReader , revListWriter := io . Pipe ( )
shasToCheckReader , shasToCheckWriter := io . Pipe ( )
wg . Add ( 2 )
2022-01-19 23:26:57 +00:00
go pipeline . CatFileBatchCheck ( ctx , shasToCheckReader , catFileCheckWriter , & wg , basePath )
2021-04-09 00:25:57 +02:00
go pipeline . BlobsFromRevListObjects ( revListReader , shasToCheckWriter , & wg )
2022-01-19 23:26:57 +00:00
go pipeline . RevListAllObjects ( ctx , revListWriter , & wg , basePath , errChan )
2021-04-09 00:25:57 +02:00
} else {
2022-01-19 23:26:57 +00:00
go pipeline . CatFileBatchCheckAllObjects ( ctx , catFileCheckWriter , & wg , basePath , errChan )
2021-04-09 00:25:57 +02:00
}
wg . Wait ( )
close ( pointerChan )
close ( errChan )
}
func createPointerResultsFromCatFileBatch ( ctx context . Context , catFileBatchReader * io . PipeReader , wg * sync . WaitGroup , pointerChan chan <- PointerBlob ) {
defer wg . Done ( )
defer catFileBatchReader . Close ( )
bufferedReader := bufio . NewReader ( catFileBatchReader )
buf := make ( [ ] byte , 1025 )
loop :
for {
select {
case <- ctx . Done ( ) :
break loop
default :
}
// File descriptor line: sha
sha , err := bufferedReader . ReadString ( ' ' )
if err != nil {
_ = catFileBatchReader . CloseWithError ( err )
break
}
2021-04-22 00:31:59 +08:00
sha = strings . TrimSpace ( sha )
2021-04-09 00:25:57 +02:00
// Throw away the blob
if _ , err := bufferedReader . ReadString ( ' ' ) ; err != nil {
_ = catFileBatchReader . CloseWithError ( err )
break
}
sizeStr , err := bufferedReader . ReadString ( '\n' )
if err != nil {
_ = catFileBatchReader . CloseWithError ( err )
break
}
size , err := strconv . Atoi ( sizeStr [ : len ( sizeStr ) - 1 ] )
if err != nil {
_ = catFileBatchReader . CloseWithError ( err )
break
}
pointerBuf := buf [ : size + 1 ]
if _ , err := io . ReadFull ( bufferedReader , pointerBuf ) ; err != nil {
_ = catFileBatchReader . CloseWithError ( err )
break
}
pointerBuf = pointerBuf [ : size ]
// Now we need to check if the pointerBuf is an LFS pointer
pointer , _ := ReadPointerFromBuffer ( pointerBuf )
if ! pointer . IsValid ( ) {
continue
}
pointerChan <- PointerBlob { Hash : sha , Pointer : pointer }
}
}