2019-02-12 16:07:31 +03:00
// Copyright 2019 The Gitea Authors. All rights reserved.
2022-11-27 21:20:29 +03:00
// SPDX-License-Identifier: MIT
2019-02-12 16:07:31 +03:00
2021-11-24 10:56:24 +03:00
package files
2019-02-12 16:07:31 +03:00
import (
2022-01-20 02:26:57 +03:00
"context"
2019-02-12 16:07:31 +03:00
"fmt"
"os"
"path"
"strings"
2022-06-12 18:51:54 +03:00
git_model "code.gitea.io/gitea/models/git"
2021-12-10 04:27:50 +03:00
repo_model "code.gitea.io/gitea/models/repo"
2021-11-24 12:49:20 +03:00
user_model "code.gitea.io/gitea/models/user"
2021-03-01 15:14:17 +03:00
"code.gitea.io/gitea/modules/git"
2019-02-12 16:07:31 +03:00
"code.gitea.io/gitea/modules/lfs"
"code.gitea.io/gitea/modules/setting"
)
// UploadRepoFileOptions contains the uploaded repository file options
type UploadRepoFileOptions struct {
LastCommitID string
OldBranch string
NewBranch string
TreePath string
Message string
Files [ ] string // In UUID format.
2021-01-29 11:57:45 +03:00
Signoff bool
2019-02-12 16:07:31 +03:00
}
type uploadInfo struct {
2022-08-25 05:31:57 +03:00
upload * repo_model . Upload
2022-06-12 18:51:54 +03:00
lfsMetaObject * git_model . LFSMetaObject
2019-02-12 16:07:31 +03:00
}
2023-09-15 09:13:19 +03:00
func cleanUpAfterFailure ( ctx context . Context , infos * [ ] uploadInfo , t * TemporaryUploadRepository , original error ) error {
2019-02-12 16:07:31 +03:00
for _ , info := range * infos {
if info . lfsMetaObject == nil {
continue
}
if ! info . lfsMetaObject . Existing {
2023-09-15 09:13:19 +03:00
if _ , err := git_model . RemoveLFSMetaObjectByOid ( ctx , t . repo . ID , info . lfsMetaObject . Oid ) ; err != nil {
2022-10-24 22:29:17 +03:00
original = fmt . Errorf ( "%w, %v" , original , err ) // We wrap the original error - as this is the underlying error that required the fallback
2019-02-12 16:07:31 +03:00
}
}
}
return original
}
// UploadRepoFiles uploads files to the given repository
2022-01-20 02:26:57 +03:00
func UploadRepoFiles ( ctx context . Context , repo * repo_model . Repository , doer * user_model . User , opts * UploadRepoFileOptions ) error {
2019-02-12 16:07:31 +03:00
if len ( opts . Files ) == 0 {
return nil
}
2023-09-15 09:13:19 +03:00
uploads , err := repo_model . GetUploadsByUUIDs ( ctx , opts . Files )
2019-02-12 16:07:31 +03:00
if err != nil {
2022-10-24 22:29:17 +03:00
return fmt . Errorf ( "GetUploadsByUUIDs [uuids: %v]: %w" , opts . Files , err )
2019-02-12 16:07:31 +03:00
}
2019-11-02 10:37:05 +03:00
names := make ( [ ] string , len ( uploads ) )
infos := make ( [ ] uploadInfo , len ( uploads ) )
for i , upload := range uploads {
// Check file is not lfs locked, will return nil if lock setting not enabled
filepath := path . Join ( opts . TreePath , upload . Name )
2023-01-09 06:50:54 +03:00
lfsLock , err := git_model . GetTreePathLock ( ctx , repo . ID , filepath )
2019-11-02 10:37:05 +03:00
if err != nil {
return err
}
if lfsLock != nil && lfsLock . OwnerID != doer . ID {
2022-12-03 05:48:26 +03:00
u , err := user_model . GetUserByID ( ctx , lfsLock . OwnerID )
2021-11-24 12:49:20 +03:00
if err != nil {
return err
}
2022-06-12 18:51:54 +03:00
return git_model . ErrLFSFileLocked { RepoID : repo . ID , Path : filepath , UserName : u . Name }
2019-11-02 10:37:05 +03:00
}
names [ i ] = upload . Name
infos [ i ] = uploadInfo { upload : upload }
}
2022-01-20 02:26:57 +03:00
t , err := NewTemporaryUploadRepository ( ctx , repo )
2019-02-12 16:07:31 +03:00
if err != nil {
return err
}
2019-06-12 22:41:28 +03:00
defer t . Close ( )
2023-04-19 16:40:42 +03:00
hasOldBranch := true
2024-01-16 18:06:51 +03:00
if err = t . Clone ( opts . OldBranch , true ) ; err != nil {
2023-04-19 16:40:42 +03:00
if ! git . IsErrBranchNotExist ( err ) || ! repo . IsEmpty {
return err
}
2023-12-17 14:56:08 +03:00
if err = t . Init ( repo . ObjectFormatName ) ; err != nil {
2023-04-19 16:40:42 +03:00
return err
}
hasOldBranch = false
opts . LastCommitID = ""
2019-02-12 16:07:31 +03:00
}
2023-04-19 16:40:42 +03:00
if hasOldBranch {
if err = t . SetDefaultIndex ( ) ; err != nil {
return err
}
2019-02-12 16:07:31 +03:00
}
2019-10-12 03:13:27 +03:00
var filename2attribute2info map [ string ] map [ string ] string
if setting . LFS . StartServer {
2021-03-01 15:14:17 +03:00
filename2attribute2info , err = t . gitRepo . CheckAttribute ( git . CheckAttributeOpts {
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 05:30:43 +03:00
Attributes : [ ] string { "filter" } ,
2021-03-01 15:14:17 +03:00
Filenames : names ,
2022-01-18 10:44:30 +03:00
CachedOnly : true ,
2021-03-01 15:14:17 +03:00
} )
2019-10-12 03:13:27 +03:00
if err != nil {
return err
}
2019-02-12 16:07:31 +03:00
}
// Copy uploaded files into repository.
2021-03-20 19:30:29 +03:00
for i := range infos {
if err := copyUploadedLFSFileIntoRepository ( & infos [ i ] , filename2attribute2info , t , opts . TreePath ) ; err != nil {
2019-02-12 16:07:31 +03:00
return err
}
}
// Now write the tree
treeHash , err := t . WriteTree ( )
if err != nil {
return err
}
2019-04-17 19:06:35 +03:00
// make author and committer the doer
author := doer
committer := doer
2019-02-12 16:07:31 +03:00
// Now commit the tree
2022-03-28 22:48:41 +03:00
commitHash , err := t . CommitTree ( opts . LastCommitID , author , committer , treeHash , opts . Message , opts . Signoff )
2019-02-12 16:07:31 +03:00
if err != nil {
return err
}
// Now deal with LFS objects
2021-03-20 19:30:29 +03:00
for i := range infos {
if infos [ i ] . lfsMetaObject == nil {
2019-02-12 16:07:31 +03:00
continue
}
2023-12-07 10:27:36 +03:00
infos [ i ] . lfsMetaObject , err = git_model . NewLFSMetaObject ( ctx , infos [ i ] . lfsMetaObject . RepositoryID , infos [ i ] . lfsMetaObject . Pointer )
2019-02-12 16:07:31 +03:00
if err != nil {
// OK Now we need to cleanup
2023-09-15 09:13:19 +03:00
return cleanUpAfterFailure ( ctx , & infos , t , err )
2019-02-12 16:07:31 +03:00
}
// Don't move the files yet - we need to ensure that
// everything can be inserted first
}
// OK now we can insert the data into the store - there's no way to clean up the store
// once it's in there, it's in there.
2021-04-09 01:25:57 +03:00
contentStore := lfs . NewContentStore ( )
2021-03-20 01:49:29 +03:00
for _ , info := range infos {
if err := uploadToLFSContentStore ( info , contentStore ) ; err != nil {
2023-09-15 09:13:19 +03:00
return cleanUpAfterFailure ( ctx , & infos , t , err )
2020-09-08 18:45:10 +03:00
}
2019-02-12 16:07:31 +03:00
}
// Then push this tree to NewBranch
if err := t . Push ( doer , commitHash , opts . NewBranch ) ; err != nil {
return err
}
2023-09-15 09:13:19 +03:00
return repo_model . DeleteUploads ( ctx , uploads ... )
2019-02-12 16:07:31 +03:00
}
2021-03-20 01:49:29 +03:00
2021-03-20 19:30:29 +03:00
func copyUploadedLFSFileIntoRepository ( info * uploadInfo , filename2attribute2info map [ string ] map [ string ] string , t * TemporaryUploadRepository , treePath string ) error {
file , err := os . Open ( info . upload . LocalPath ( ) )
if err != nil {
return err
}
defer file . Close ( )
var objectHash string
if setting . LFS . StartServer && filename2attribute2info [ info . upload . Name ] != nil && filename2attribute2info [ info . upload . Name ] [ "filter" ] == "lfs" {
// Handle LFS
// FIXME: Inefficient! this should probably happen in models.Upload
2021-04-09 01:25:57 +03:00
pointer , err := lfs . GeneratePointer ( file )
2021-03-20 19:30:29 +03:00
if err != nil {
return err
}
2022-06-12 18:51:54 +03:00
info . lfsMetaObject = & git_model . LFSMetaObject { Pointer : pointer , RepositoryID : t . repo . ID }
2021-03-20 19:30:29 +03:00
2021-04-09 01:25:57 +03:00
if objectHash , err = t . HashObject ( strings . NewReader ( pointer . StringContent ( ) ) ) ; err != nil {
2021-03-20 19:30:29 +03:00
return err
}
} else if objectHash , err = t . HashObject ( file ) ; err != nil {
return err
}
// Add the object to the index
return t . AddObjectToIndex ( "100644" , objectHash , path . Join ( treePath , info . upload . Name ) )
}
2021-03-20 01:49:29 +03:00
func uploadToLFSContentStore ( info uploadInfo , contentStore * lfs . ContentStore ) error {
if info . lfsMetaObject == nil {
return nil
}
2021-04-09 01:25:57 +03:00
exist , err := contentStore . Exists ( info . lfsMetaObject . Pointer )
2021-03-20 01:49:29 +03:00
if err != nil {
return err
}
if ! exist {
file , err := os . Open ( info . upload . LocalPath ( ) )
if err != nil {
return err
}
defer file . Close ( )
// FIXME: Put regenerates the hash and copies the file over.
// I guess this strictly ensures the soundness of the store but this is inefficient.
2021-04-09 01:25:57 +03:00
if err := contentStore . Put ( info . lfsMetaObject . Pointer , file ) ; err != nil {
2021-03-20 01:49:29 +03:00
// OK Now we need to cleanup
// Can't clean up the store, once uploaded there they're there.
return err
}
}
return nil
}