2020-12-17 17:00:47 +03:00
// Copyright 2015 The Gogs Authors. All rights reserved.
// Copyright 2017 The Gitea Authors. All rights reserved.
2022-11-27 21:20:29 +03:00
// SPDX-License-Identifier: MIT
2020-12-17 17:00:47 +03:00
2021-08-24 19:47:09 +03:00
//go:build !gogit
2020-12-17 17:00:47 +03:00
package git
import (
2021-05-10 04:27:03 +03:00
"bufio"
"context"
2020-12-17 17:00:47 +03:00
"errors"
"path/filepath"
2021-06-25 19:54:08 +03:00
"code.gitea.io/gitea/modules/log"
2020-12-17 17:00:47 +03:00
)
2024-01-19 19:05:02 +03:00
func init ( ) {
isGogit = false
}
2020-12-17 17:00:47 +03:00
// Repository represents a Git repository.
type Repository struct {
Path string
tagCache * ObjectCache
gpgSettings * GPGSettings
2021-05-10 04:27:03 +03:00
Prevent double use of `git cat-file` session. (#29298)
Fixes the reason why #29101 is hard to replicate.
Related #29297
Create a repo with a file with minimum size 4097 bytes (I use 10000) and
execute the following code:
```go
gitRepo, err := gitrepo.OpenRepository(db.DefaultContext, <repo>)
assert.NoError(t, err)
commit, err := gitRepo.GetCommit(<sha>)
assert.NoError(t, err)
entry, err := commit.GetTreeEntryByPath(<file>)
assert.NoError(t, err)
b := entry.Blob()
// Create a reader
r, err := b.DataAsync()
assert.NoError(t, err)
defer r.Close()
// Create a second reader
r2, err := b.DataAsync()
assert.NoError(t, err) // Should be no error but is ErrNotExist
defer r2.Close()
```
The problem is the check in `CatFileBatch`:
https://github.com/go-gitea/gitea/blob/79217ea63c1f77de7ca79813ae45950724e63d02/modules/git/repo_base_nogogit.go#L81-L87
`Buffered() > 0` is used to check if there is a "operation" in progress
at the moment. This is a problem because we can't control the internal
buffer in the `bufio.Reader`. The code above demonstrates a sequence
which initiates an operation for which the code thinks there is no
active processing. The second call to `DataAsync()` therefore reuses the
existing instances instead of creating a new batch reader.
(cherry picked from commit f74c869221624092999097af38b6f7fae4701420)
2024-02-21 21:54:17 +03:00
batchInUse bool
2021-05-10 04:27:03 +03:00
batchCancel context . CancelFunc
batchReader * bufio . Reader
batchWriter WriteCloserError
Prevent double use of `git cat-file` session. (#29298)
Fixes the reason why #29101 is hard to replicate.
Related #29297
Create a repo with a file with minimum size 4097 bytes (I use 10000) and
execute the following code:
```go
gitRepo, err := gitrepo.OpenRepository(db.DefaultContext, <repo>)
assert.NoError(t, err)
commit, err := gitRepo.GetCommit(<sha>)
assert.NoError(t, err)
entry, err := commit.GetTreeEntryByPath(<file>)
assert.NoError(t, err)
b := entry.Blob()
// Create a reader
r, err := b.DataAsync()
assert.NoError(t, err)
defer r.Close()
// Create a second reader
r2, err := b.DataAsync()
assert.NoError(t, err) // Should be no error but is ErrNotExist
defer r2.Close()
```
The problem is the check in `CatFileBatch`:
https://github.com/go-gitea/gitea/blob/79217ea63c1f77de7ca79813ae45950724e63d02/modules/git/repo_base_nogogit.go#L81-L87
`Buffered() > 0` is used to check if there is a "operation" in progress
at the moment. This is a problem because we can't control the internal
buffer in the `bufio.Reader`. The code above demonstrates a sequence
which initiates an operation for which the code thinks there is no
active processing. The second call to `DataAsync()` therefore reuses the
existing instances instead of creating a new batch reader.
(cherry picked from commit f74c869221624092999097af38b6f7fae4701420)
2024-02-21 21:54:17 +03:00
checkInUse bool
2021-05-10 04:27:03 +03:00
checkCancel context . CancelFunc
checkReader * bufio . Reader
checkWriter WriteCloserError
2021-11-30 23:06:32 +03:00
2022-07-25 18:39:42 +03:00
Ctx context . Context
LastCommitCache * LastCommitCache
2023-12-14 00:02:00 +03:00
objectFormat ObjectFormat
2020-12-17 17:00:47 +03:00
}
2022-03-29 22:13:41 +03:00
// openRepositoryWithDefaultContext opens the repository at the given path with DefaultContext.
func openRepositoryWithDefaultContext ( repoPath string ) ( * Repository , error ) {
return OpenRepository ( DefaultContext , repoPath )
2021-11-30 23:06:32 +03:00
}
2022-03-29 22:13:41 +03:00
// OpenRepository opens the repository at the given path with the provided context.
func OpenRepository ( ctx context . Context , repoPath string ) ( * Repository , error ) {
2020-12-17 17:00:47 +03:00
repoPath , err := filepath . Abs ( repoPath )
if err != nil {
return nil , err
} else if ! isDir ( repoPath ) {
return nil , errors . New ( "no such file or directory" )
}
2021-05-10 04:27:03 +03:00
2021-12-16 22:01:14 +03:00
// Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first!
if err := EnsureValidGitRepository ( ctx , repoPath ) ; err != nil {
return nil , err
}
2021-05-10 04:27:03 +03:00
repo := & Repository {
2020-12-17 17:00:47 +03:00
Path : repoPath ,
tagCache : newObjectCache ( ) ,
2021-11-30 23:06:32 +03:00
Ctx : ctx ,
2021-05-10 04:27:03 +03:00
}
2021-11-30 23:06:32 +03:00
repo . batchWriter , repo . batchReader , repo . batchCancel = CatFileBatch ( ctx , repoPath )
2023-04-19 16:40:42 +03:00
repo . checkWriter , repo . checkReader , repo . checkCancel = CatFileBatchCheck ( ctx , repoPath )
2021-05-10 04:27:03 +03:00
return repo , nil
}
// CatFileBatch obtains a CatFileBatch for this repository
2021-11-30 23:06:32 +03:00
func ( repo * Repository ) CatFileBatch ( ctx context . Context ) ( WriteCloserError , * bufio . Reader , func ( ) ) {
Prevent double use of `git cat-file` session. (#29298)
Fixes the reason why #29101 is hard to replicate.
Related #29297
Create a repo with a file with minimum size 4097 bytes (I use 10000) and
execute the following code:
```go
gitRepo, err := gitrepo.OpenRepository(db.DefaultContext, <repo>)
assert.NoError(t, err)
commit, err := gitRepo.GetCommit(<sha>)
assert.NoError(t, err)
entry, err := commit.GetTreeEntryByPath(<file>)
assert.NoError(t, err)
b := entry.Blob()
// Create a reader
r, err := b.DataAsync()
assert.NoError(t, err)
defer r.Close()
// Create a second reader
r2, err := b.DataAsync()
assert.NoError(t, err) // Should be no error but is ErrNotExist
defer r2.Close()
```
The problem is the check in `CatFileBatch`:
https://github.com/go-gitea/gitea/blob/79217ea63c1f77de7ca79813ae45950724e63d02/modules/git/repo_base_nogogit.go#L81-L87
`Buffered() > 0` is used to check if there is a "operation" in progress
at the moment. This is a problem because we can't control the internal
buffer in the `bufio.Reader`. The code above demonstrates a sequence
which initiates an operation for which the code thinks there is no
active processing. The second call to `DataAsync()` therefore reuses the
existing instances instead of creating a new batch reader.
(cherry picked from commit f74c869221624092999097af38b6f7fae4701420)
2024-02-21 21:54:17 +03:00
if repo . batchCancel == nil || repo . batchInUse {
2021-06-25 19:54:08 +03:00
log . Debug ( "Opening temporary cat file batch for: %s" , repo . Path )
2021-11-30 23:06:32 +03:00
return CatFileBatch ( ctx , repo . Path )
2021-05-10 04:27:03 +03:00
}
Prevent double use of `git cat-file` session. (#29298)
Fixes the reason why #29101 is hard to replicate.
Related #29297
Create a repo with a file with minimum size 4097 bytes (I use 10000) and
execute the following code:
```go
gitRepo, err := gitrepo.OpenRepository(db.DefaultContext, <repo>)
assert.NoError(t, err)
commit, err := gitRepo.GetCommit(<sha>)
assert.NoError(t, err)
entry, err := commit.GetTreeEntryByPath(<file>)
assert.NoError(t, err)
b := entry.Blob()
// Create a reader
r, err := b.DataAsync()
assert.NoError(t, err)
defer r.Close()
// Create a second reader
r2, err := b.DataAsync()
assert.NoError(t, err) // Should be no error but is ErrNotExist
defer r2.Close()
```
The problem is the check in `CatFileBatch`:
https://github.com/go-gitea/gitea/blob/79217ea63c1f77de7ca79813ae45950724e63d02/modules/git/repo_base_nogogit.go#L81-L87
`Buffered() > 0` is used to check if there is a "operation" in progress
at the moment. This is a problem because we can't control the internal
buffer in the `bufio.Reader`. The code above demonstrates a sequence
which initiates an operation for which the code thinks there is no
active processing. The second call to `DataAsync()` therefore reuses the
existing instances instead of creating a new batch reader.
(cherry picked from commit f74c869221624092999097af38b6f7fae4701420)
2024-02-21 21:54:17 +03:00
repo . batchInUse = true
return repo . batchWriter , repo . batchReader , func ( ) {
repo . batchInUse = false
}
2021-05-10 04:27:03 +03:00
}
// CatFileBatchCheck obtains a CatFileBatchCheck for this repository
2021-11-30 23:06:32 +03:00
func ( repo * Repository ) CatFileBatchCheck ( ctx context . Context ) ( WriteCloserError , * bufio . Reader , func ( ) ) {
Prevent double use of `git cat-file` session. (#29298)
Fixes the reason why #29101 is hard to replicate.
Related #29297
Create a repo with a file with minimum size 4097 bytes (I use 10000) and
execute the following code:
```go
gitRepo, err := gitrepo.OpenRepository(db.DefaultContext, <repo>)
assert.NoError(t, err)
commit, err := gitRepo.GetCommit(<sha>)
assert.NoError(t, err)
entry, err := commit.GetTreeEntryByPath(<file>)
assert.NoError(t, err)
b := entry.Blob()
// Create a reader
r, err := b.DataAsync()
assert.NoError(t, err)
defer r.Close()
// Create a second reader
r2, err := b.DataAsync()
assert.NoError(t, err) // Should be no error but is ErrNotExist
defer r2.Close()
```
The problem is the check in `CatFileBatch`:
https://github.com/go-gitea/gitea/blob/79217ea63c1f77de7ca79813ae45950724e63d02/modules/git/repo_base_nogogit.go#L81-L87
`Buffered() > 0` is used to check if there is a "operation" in progress
at the moment. This is a problem because we can't control the internal
buffer in the `bufio.Reader`. The code above demonstrates a sequence
which initiates an operation for which the code thinks there is no
active processing. The second call to `DataAsync()` therefore reuses the
existing instances instead of creating a new batch reader.
(cherry picked from commit f74c869221624092999097af38b6f7fae4701420)
2024-02-21 21:54:17 +03:00
if repo . checkCancel == nil || repo . checkInUse {
log . Debug ( "Opening temporary cat file batch-check for: %s" , repo . Path )
2021-11-30 23:06:32 +03:00
return CatFileBatchCheck ( ctx , repo . Path )
2021-05-10 04:27:03 +03:00
}
Prevent double use of `git cat-file` session. (#29298)
Fixes the reason why #29101 is hard to replicate.
Related #29297
Create a repo with a file with minimum size 4097 bytes (I use 10000) and
execute the following code:
```go
gitRepo, err := gitrepo.OpenRepository(db.DefaultContext, <repo>)
assert.NoError(t, err)
commit, err := gitRepo.GetCommit(<sha>)
assert.NoError(t, err)
entry, err := commit.GetTreeEntryByPath(<file>)
assert.NoError(t, err)
b := entry.Blob()
// Create a reader
r, err := b.DataAsync()
assert.NoError(t, err)
defer r.Close()
// Create a second reader
r2, err := b.DataAsync()
assert.NoError(t, err) // Should be no error but is ErrNotExist
defer r2.Close()
```
The problem is the check in `CatFileBatch`:
https://github.com/go-gitea/gitea/blob/79217ea63c1f77de7ca79813ae45950724e63d02/modules/git/repo_base_nogogit.go#L81-L87
`Buffered() > 0` is used to check if there is a "operation" in progress
at the moment. This is a problem because we can't control the internal
buffer in the `bufio.Reader`. The code above demonstrates a sequence
which initiates an operation for which the code thinks there is no
active processing. The second call to `DataAsync()` therefore reuses the
existing instances instead of creating a new batch reader.
(cherry picked from commit f74c869221624092999097af38b6f7fae4701420)
2024-02-21 21:54:17 +03:00
repo . checkInUse = true
return repo . checkWriter , repo . checkReader , func ( ) {
repo . checkInUse = false
}
2020-12-17 17:00:47 +03:00
}
2024-02-25 16:05:23 +03:00
func ( repo * Repository ) Close ( ) error {
2021-05-10 04:27:03 +03:00
if repo == nil {
2023-07-09 14:58:06 +03:00
return nil
2021-05-10 04:27:03 +03:00
}
if repo . batchCancel != nil {
repo . batchCancel ( )
repo . batchReader = nil
repo . batchWriter = nil
repo . batchCancel = nil
Prevent double use of `git cat-file` session. (#29298)
Fixes the reason why #29101 is hard to replicate.
Related #29297
Create a repo with a file with minimum size 4097 bytes (I use 10000) and
execute the following code:
```go
gitRepo, err := gitrepo.OpenRepository(db.DefaultContext, <repo>)
assert.NoError(t, err)
commit, err := gitRepo.GetCommit(<sha>)
assert.NoError(t, err)
entry, err := commit.GetTreeEntryByPath(<file>)
assert.NoError(t, err)
b := entry.Blob()
// Create a reader
r, err := b.DataAsync()
assert.NoError(t, err)
defer r.Close()
// Create a second reader
r2, err := b.DataAsync()
assert.NoError(t, err) // Should be no error but is ErrNotExist
defer r2.Close()
```
The problem is the check in `CatFileBatch`:
https://github.com/go-gitea/gitea/blob/79217ea63c1f77de7ca79813ae45950724e63d02/modules/git/repo_base_nogogit.go#L81-L87
`Buffered() > 0` is used to check if there is a "operation" in progress
at the moment. This is a problem because we can't control the internal
buffer in the `bufio.Reader`. The code above demonstrates a sequence
which initiates an operation for which the code thinks there is no
active processing. The second call to `DataAsync()` therefore reuses the
existing instances instead of creating a new batch reader.
(cherry picked from commit f74c869221624092999097af38b6f7fae4701420)
2024-02-21 21:54:17 +03:00
repo . batchInUse = false
2021-05-10 04:27:03 +03:00
}
if repo . checkCancel != nil {
repo . checkCancel ( )
repo . checkCancel = nil
repo . checkReader = nil
repo . checkWriter = nil
Prevent double use of `git cat-file` session. (#29298)
Fixes the reason why #29101 is hard to replicate.
Related #29297
Create a repo with a file with minimum size 4097 bytes (I use 10000) and
execute the following code:
```go
gitRepo, err := gitrepo.OpenRepository(db.DefaultContext, <repo>)
assert.NoError(t, err)
commit, err := gitRepo.GetCommit(<sha>)
assert.NoError(t, err)
entry, err := commit.GetTreeEntryByPath(<file>)
assert.NoError(t, err)
b := entry.Blob()
// Create a reader
r, err := b.DataAsync()
assert.NoError(t, err)
defer r.Close()
// Create a second reader
r2, err := b.DataAsync()
assert.NoError(t, err) // Should be no error but is ErrNotExist
defer r2.Close()
```
The problem is the check in `CatFileBatch`:
https://github.com/go-gitea/gitea/blob/79217ea63c1f77de7ca79813ae45950724e63d02/modules/git/repo_base_nogogit.go#L81-L87
`Buffered() > 0` is used to check if there is a "operation" in progress
at the moment. This is a problem because we can't control the internal
buffer in the `bufio.Reader`. The code above demonstrates a sequence
which initiates an operation for which the code thinks there is no
active processing. The second call to `DataAsync()` therefore reuses the
existing instances instead of creating a new batch reader.
(cherry picked from commit f74c869221624092999097af38b6f7fae4701420)
2024-02-21 21:54:17 +03:00
repo . checkInUse = false
2021-05-10 04:27:03 +03:00
}
2022-07-25 18:39:42 +03:00
repo . LastCommitCache = nil
repo . tagCache = nil
2024-02-25 16:05:23 +03:00
return nil
2020-12-17 17:00:47 +03:00
}