2016-11-03 23:16:01 +01:00
// Copyright 2015 The Gogs Authors. All rights reserved.
2019-06-07 22:29:29 +02:00
// Copyright 2019 The Gitea Authors. All rights reserved.
2016-11-03 23:16:01 +01:00
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package git
import (
2021-12-23 09:32:29 +01:00
"bufio"
2020-05-26 06:58:07 +01:00
"bytes"
2022-01-19 23:26:57 +00:00
"context"
2021-12-23 09:32:29 +01:00
"errors"
2016-11-03 23:16:01 +01:00
"fmt"
2018-01-07 14:10:20 +01:00
"io"
2021-12-23 09:32:29 +01:00
"os"
"path/filepath"
2020-05-26 06:58:07 +01:00
"regexp"
2016-11-03 23:16:01 +01:00
"strconv"
"strings"
"time"
2019-06-12 21:41:28 +02:00
logger "code.gitea.io/gitea/modules/log"
2016-11-03 23:16:01 +01:00
)
2019-06-07 22:29:29 +02:00
// CompareInfo represents needed information for comparing references.
type CompareInfo struct {
2021-02-16 15:39:45 +00:00
MergeBase string
BaseCommitID string
HeadCommitID string
2021-08-09 20:08:51 +02:00
Commits [ ] * Commit
2021-02-16 15:39:45 +00:00
NumFiles int
2016-11-03 23:16:01 +01:00
}
2019-06-12 01:32:08 +02:00
// GetMergeBase checks and returns merge base of two branches and the reference used as base.
2021-12-20 05:41:31 +01:00
func ( repo * Repository ) GetMergeBase ( tmpRemote , base , head string ) ( string , string , error ) {
2019-06-07 22:29:29 +02:00
if tmpRemote == "" {
tmpRemote = "origin"
}
if tmpRemote != "origin" {
2021-12-02 08:28:08 +01:00
tmpBaseName := RemotePrefix + tmpRemote + "/tmp_" + base
2019-06-07 22:29:29 +02:00
// Fetch commit into a temporary branch in order to be able to handle commits and tags
2022-04-01 10:55:30 +08:00
_ , _ , err := NewCommand ( repo . Ctx , "fetch" , tmpRemote , base + ":" + tmpBaseName ) . RunStdString ( & RunOpts { Dir : repo . Path } )
2019-06-07 22:29:29 +02:00
if err == nil {
base = tmpBaseName
}
}
2022-04-01 10:55:30 +08:00
stdout , _ , err := NewCommand ( repo . Ctx , "merge-base" , "--" , base , head ) . RunStdString ( & RunOpts { Dir : repo . Path } )
2019-06-12 01:32:08 +02:00
return strings . TrimSpace ( stdout ) , base , err
2016-11-03 23:16:01 +01:00
}
2019-06-07 22:29:29 +02:00
// GetCompareInfo generates and returns compare information between base and head branches of repositories.
2021-10-15 17:05:33 +01:00
func ( repo * Repository ) GetCompareInfo ( basePath , baseBranch , headBranch string , directComparison , fileOnly bool ) ( _ * CompareInfo , err error ) {
2019-06-07 22:29:29 +02:00
var (
remoteBranch string
tmpRemote string
)
2016-11-03 23:16:01 +01:00
// We don't need a temporary remote for same repository.
if repo . Path != basePath {
// Add a temporary remote
2019-06-07 22:29:29 +02:00
tmpRemote = strconv . FormatInt ( time . Now ( ) . UnixNano ( ) , 10 )
2019-08-13 16:30:44 +08:00
if err = repo . AddRemote ( tmpRemote , basePath , false ) ; err != nil {
2016-11-03 23:16:01 +01:00
return nil , fmt . Errorf ( "AddRemote: %v" , err )
}
2019-06-12 21:41:28 +02:00
defer func ( ) {
if err := repo . RemoveRemote ( tmpRemote ) ; err != nil {
logger . Error ( "GetPullRequestInfo: RemoveRemote: %v" , err )
}
} ( )
2016-11-03 23:16:01 +01:00
}
2019-06-07 22:29:29 +02:00
compareInfo := new ( CompareInfo )
2021-02-16 15:39:45 +00:00
2022-01-19 23:26:57 +00:00
compareInfo . HeadCommitID , err = GetFullCommitID ( repo . Ctx , repo . Path , headBranch )
2021-02-16 15:39:45 +00:00
if err != nil {
compareInfo . HeadCommitID = headBranch
}
2019-06-12 01:32:08 +02:00
compareInfo . MergeBase , remoteBranch , err = repo . GetMergeBase ( tmpRemote , baseBranch , headBranch )
2019-04-09 21:45:58 +01:00
if err == nil {
2022-01-19 23:26:57 +00:00
compareInfo . BaseCommitID , err = GetFullCommitID ( repo . Ctx , repo . Path , remoteBranch )
2021-02-16 15:39:45 +00:00
if err != nil {
compareInfo . BaseCommitID = remoteBranch
}
2021-09-27 13:19:34 +01:00
separator := "..."
baseCommitID := compareInfo . MergeBase
if directComparison {
separator = ".."
baseCommitID = compareInfo . BaseCommitID
}
2020-07-29 18:53:04 +01:00
// We have a common base - therefore we know that ... should work
2021-10-15 17:05:33 +01:00
if ! fileOnly {
2022-04-01 10:55:30 +08:00
var logs [ ] byte
logs , _ , err = NewCommand ( repo . Ctx , "log" , baseCommitID + separator + headBranch , prettyLogFormat ) . RunStdBytes ( & RunOpts { Dir : repo . Path } )
2021-10-15 17:05:33 +01:00
if err != nil {
return nil , err
}
compareInfo . Commits , err = repo . parsePrettyFormatLogToList ( logs )
if err != nil {
return nil , fmt . Errorf ( "parsePrettyFormatLogToList: %v" , err )
}
} else {
compareInfo . Commits = [ ] * Commit { }
2019-04-09 21:45:58 +01:00
}
} else {
2021-08-09 20:08:51 +02:00
compareInfo . Commits = [ ] * Commit { }
2022-01-19 23:26:57 +00:00
compareInfo . MergeBase , err = GetFullCommitID ( repo . Ctx , repo . Path , remoteBranch )
2019-04-09 21:45:58 +01:00
if err != nil {
2019-06-07 22:29:29 +02:00
compareInfo . MergeBase = remoteBranch
2019-04-09 21:45:58 +01:00
}
2021-02-16 15:39:45 +00:00
compareInfo . BaseCommitID = compareInfo . MergeBase
2016-11-03 23:16:01 +01:00
}
// Count number of changed files.
2020-05-26 06:58:07 +01:00
// This probably should be removed as we need to use shortstat elsewhere
// Now there is git diff --shortstat but this appears to be slower than simply iterating with --nameonly
2021-09-27 13:19:34 +01:00
compareInfo . NumFiles , err = repo . GetDiffNumChangedFiles ( remoteBranch , headBranch , directComparison )
2016-11-03 23:16:01 +01:00
if err != nil {
return nil , err
}
2019-06-07 22:29:29 +02:00
return compareInfo , nil
2016-11-03 23:16:01 +01:00
}
2020-05-26 06:58:07 +01:00
type lineCountWriter struct {
numLines int
}
// Write counts the number of newlines in the provided bytestream
func ( l * lineCountWriter ) Write ( p [ ] byte ) ( n int , err error ) {
n = len ( p )
l . numLines += bytes . Count ( p , [ ] byte { '\000' } )
return
}
// GetDiffNumChangedFiles counts the number of changed files
// This is substantially quicker than shortstat but...
2021-09-27 13:19:34 +01:00
func ( repo * Repository ) GetDiffNumChangedFiles ( base , head string , directComparison bool ) ( int , error ) {
2020-05-26 06:58:07 +01:00
// Now there is git diff --shortstat but this appears to be slower than simply iterating with --nameonly
w := & lineCountWriter { }
stderr := new ( bytes . Buffer )
2021-09-27 13:19:34 +01:00
separator := "..."
if directComparison {
separator = ".."
}
2022-02-06 20:01:47 +01:00
if err := NewCommand ( repo . Ctx , "diff" , "-z" , "--name-only" , base + separator + head ) .
2022-04-01 10:55:30 +08:00
Run ( & RunOpts {
Dir : repo . Path ,
Stdout : w ,
Stderr : stderr ,
2022-02-11 13:47:22 +01:00
} ) ; err != nil {
2020-07-29 18:53:04 +01:00
if strings . Contains ( stderr . String ( ) , "no merge base" ) {
// git >= 2.28 now returns an error if base and head have become unrelated.
// previously it would return the results of git diff -z --name-only base head so let's try that...
w = & lineCountWriter { }
stderr . Reset ( )
2022-04-01 10:55:30 +08:00
if err = NewCommand ( repo . Ctx , "diff" , "-z" , "--name-only" , base , head ) . Run ( & RunOpts {
Dir : repo . Path ,
Stdout : w ,
Stderr : stderr ,
2022-02-11 13:47:22 +01:00
} ) ; err == nil {
2020-07-29 18:53:04 +01:00
return w . numLines , nil
}
}
2020-05-26 06:58:07 +01:00
return 0 , fmt . Errorf ( "%v: Stderr: %s" , err , stderr )
}
return w . numLines , nil
}
// GetDiffShortStat counts number of changed files, number of additions and deletions
func ( repo * Repository ) GetDiffShortStat ( base , head string ) ( numFiles , totalAdditions , totalDeletions int , err error ) {
2022-01-19 23:26:57 +00:00
numFiles , totalAdditions , totalDeletions , err = GetDiffShortStat ( repo . Ctx , repo . Path , base + "..." + head )
2020-07-29 18:53:04 +01:00
if err != nil && strings . Contains ( err . Error ( ) , "no merge base" ) {
2022-01-19 23:26:57 +00:00
return GetDiffShortStat ( repo . Ctx , repo . Path , base , head )
2020-07-29 18:53:04 +01:00
}
return
2020-05-26 06:58:07 +01:00
}
// GetDiffShortStat counts number of changed files, number of additions and deletions
2022-01-19 23:26:57 +00:00
func GetDiffShortStat ( ctx context . Context , repoPath string , args ... string ) ( numFiles , totalAdditions , totalDeletions int , err error ) {
2020-05-26 06:58:07 +01:00
// Now if we call:
// $ git diff --shortstat 1ebb35b98889ff77299f24d82da426b434b0cca0...788b8b1440462d477f45b0088875
// we get:
// " 9902 files changed, 2034198 insertions(+), 298800 deletions(-)\n"
args = append ( [ ] string {
"diff" ,
"--shortstat" ,
} , args ... )
2022-04-01 10:55:30 +08:00
stdout , _ , err := NewCommand ( ctx , args ... ) . RunStdString ( & RunOpts { Dir : repoPath } )
2020-05-26 06:58:07 +01:00
if err != nil {
return 0 , 0 , 0 , err
}
return parseDiffStat ( stdout )
}
var shortStatFormat = regexp . MustCompile (
` \s*(\d+) files? changed(?:, (\d+) insertions?\(\+\))?(?:, (\d+) deletions?\(-\))? ` )
2021-12-23 09:32:29 +01:00
var patchCommits = regexp . MustCompile ( ` ^From\s(\w+)\s ` )
2020-05-26 06:58:07 +01:00
func parseDiffStat ( stdout string ) ( numFiles , totalAdditions , totalDeletions int , err error ) {
if len ( stdout ) == 0 || stdout == "\n" {
return 0 , 0 , 0 , nil
}
groups := shortStatFormat . FindStringSubmatch ( stdout )
if len ( groups ) != 4 {
return 0 , 0 , 0 , fmt . Errorf ( "unable to parse shortstat: %s groups: %s" , stdout , groups )
}
numFiles , err = strconv . Atoi ( groups [ 1 ] )
if err != nil {
return 0 , 0 , 0 , fmt . Errorf ( "unable to parse shortstat: %s. Error parsing NumFiles %v" , stdout , err )
}
if len ( groups [ 2 ] ) != 0 {
totalAdditions , err = strconv . Atoi ( groups [ 2 ] )
if err != nil {
return 0 , 0 , 0 , fmt . Errorf ( "unable to parse shortstat: %s. Error parsing NumAdditions %v" , stdout , err )
}
}
if len ( groups [ 3 ] ) != 0 {
totalDeletions , err = strconv . Atoi ( groups [ 3 ] )
if err != nil {
return 0 , 0 , 0 , fmt . Errorf ( "unable to parse shortstat: %s. Error parsing NumDeletions %v" , stdout , err )
}
}
return
}
2019-12-13 22:21:06 +00:00
// GetDiffOrPatch generates either diff or formatted patch data between given revisions
2021-09-27 23:09:49 +02:00
func ( repo * Repository ) GetDiffOrPatch ( base , head string , w io . Writer , patch , binary bool ) error {
if patch {
2019-12-13 22:21:06 +00:00
return repo . GetPatch ( base , head , w )
}
2021-09-27 23:09:49 +02:00
if binary {
return repo . GetDiffBinary ( base , head , w )
}
2019-12-13 22:21:06 +00:00
return repo . GetDiff ( base , head , w )
2016-11-03 23:16:01 +01:00
}
2018-01-07 14:10:20 +01:00
2021-09-27 23:09:49 +02:00
// GetDiff generates and returns patch data between given revisions, optimized for human readability
2019-12-13 22:21:06 +00:00
func ( repo * Repository ) GetDiff ( base , head string , w io . Writer ) error {
2022-04-01 10:55:30 +08:00
return NewCommand ( repo . Ctx , "diff" , "-p" , base , head ) . Run ( & RunOpts {
Dir : repo . Path ,
Stdout : w ,
2022-02-11 13:47:22 +01:00
} )
2021-09-27 23:09:49 +02:00
}
// GetDiffBinary generates and returns patch data between given revisions, including binary diffs.
func ( repo * Repository ) GetDiffBinary ( base , head string , w io . Writer ) error {
2021-12-19 04:19:25 +00:00
if CheckGitVersionAtLeast ( "1.7.7" ) == nil {
2022-04-01 10:55:30 +08:00
return NewCommand ( repo . Ctx , "diff" , "-p" , "--binary" , "--histogram" , base , head ) . Run ( & RunOpts {
Dir : repo . Path ,
Stdout : w ,
2022-02-11 13:47:22 +01:00
} )
2021-12-19 04:19:25 +00:00
}
2022-04-01 10:55:30 +08:00
return NewCommand ( repo . Ctx , "diff" , "-p" , "--binary" , "--patience" , base , head ) . Run ( & RunOpts {
Dir : repo . Path ,
Stdout : w ,
2022-02-11 13:47:22 +01:00
} )
2019-12-13 22:21:06 +00:00
}
2018-01-07 14:10:20 +01:00
2021-09-27 23:09:49 +02:00
// GetPatch generates and returns format-patch data between given revisions, able to be used with `git apply`
2019-12-13 22:21:06 +00:00
func ( repo * Repository ) GetPatch ( base , head string , w io . Writer ) error {
2020-07-29 18:53:04 +01:00
stderr := new ( bytes . Buffer )
2022-02-06 20:01:47 +01:00
err := NewCommand ( repo . Ctx , "format-patch" , "--binary" , "--stdout" , base + "..." + head ) .
2022-04-01 10:55:30 +08:00
Run ( & RunOpts {
Dir : repo . Path ,
Stdout : w ,
Stderr : stderr ,
2022-02-11 13:47:22 +01:00
} )
2020-07-29 18:53:04 +01:00
if err != nil && bytes . Contains ( stderr . Bytes ( ) , [ ] byte ( "no merge base" ) ) {
2022-02-06 20:01:47 +01:00
return NewCommand ( repo . Ctx , "format-patch" , "--binary" , "--stdout" , base , head ) .
2022-04-01 10:55:30 +08:00
Run ( & RunOpts {
Dir : repo . Path ,
Stdout : w ,
2022-02-11 13:47:22 +01:00
} )
2020-07-29 18:53:04 +01:00
}
return err
2018-01-07 14:10:20 +01:00
}
2020-01-09 02:47:45 +01:00
2022-05-07 20:28:10 +02:00
// GetFilesChangedBetween returns a list of all files that have been changed between the given commits
func ( repo * Repository ) GetFilesChangedBetween ( base , head string ) ( [ ] string , error ) {
stdout , _ , err := NewCommand ( repo . Ctx , "diff" , "--name-only" , base + ".." + head ) . RunStdString ( & RunOpts { Dir : repo . Path } )
if err != nil {
return nil , err
}
return strings . Split ( stdout , "\n" ) , err
}
2020-01-09 02:47:45 +01:00
// GetDiffFromMergeBase generates and return patch data from merge base to head
func ( repo * Repository ) GetDiffFromMergeBase ( base , head string , w io . Writer ) error {
2020-07-29 18:53:04 +01:00
stderr := new ( bytes . Buffer )
2022-02-06 20:01:47 +01:00
err := NewCommand ( repo . Ctx , "diff" , "-p" , "--binary" , base + "..." + head ) .
2022-04-01 10:55:30 +08:00
Run ( & RunOpts {
Dir : repo . Path ,
Stdout : w ,
Stderr : stderr ,
2022-02-11 13:47:22 +01:00
} )
2020-07-29 18:53:04 +01:00
if err != nil && bytes . Contains ( stderr . Bytes ( ) , [ ] byte ( "no merge base" ) ) {
2021-09-27 23:09:49 +02:00
return repo . GetDiffBinary ( base , head , w )
2020-07-29 18:53:04 +01:00
}
return err
2020-01-09 02:47:45 +01:00
}
2021-12-23 09:32:29 +01:00
// ReadPatchCommit will check if a diff patch exists and return stats
func ( repo * Repository ) ReadPatchCommit ( prID int64 ) ( commitSHA string , err error ) {
// Migrated repositories download patches to "pulls" location
patchFile := fmt . Sprintf ( "pulls/%d.patch" , prID )
loadPatch , err := os . Open ( filepath . Join ( repo . Path , patchFile ) )
if err != nil {
return "" , err
}
defer loadPatch . Close ( )
// Read only the first line of the patch - usually it contains the first commit made in patch
scanner := bufio . NewScanner ( loadPatch )
scanner . Scan ( )
// Parse the Patch stats, sometimes Migration returns a 404 for the patch file
commitSHAGroups := patchCommits . FindStringSubmatch ( scanner . Text ( ) )
if len ( commitSHAGroups ) != 0 {
commitSHA = commitSHAGroups [ 1 ]
} else {
return "" , errors . New ( "patch file doesn't contain valid commit ID" )
}
return commitSHA , nil
}