2016-11-04 01:16:01 +03:00
// Copyright 2015 The Gogs Authors. All rights reserved.
2019-06-07 23:29:29 +03:00
// Copyright 2019 The Gitea Authors. All rights reserved.
2016-11-04 01:16:01 +03:00
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package git
import (
2021-12-23 11:32:29 +03:00
"bufio"
2020-05-26 08:58:07 +03:00
"bytes"
2022-01-20 02:26:57 +03:00
"context"
2021-12-23 11:32:29 +03:00
"errors"
2016-11-04 01:16:01 +03:00
"fmt"
2018-01-07 16:10:20 +03:00
"io"
2021-12-23 11:32:29 +03:00
"os"
"path/filepath"
2020-05-26 08:58:07 +03:00
"regexp"
2016-11-04 01:16:01 +03:00
"strconv"
"strings"
"time"
2019-06-12 22:41:28 +03:00
logger "code.gitea.io/gitea/modules/log"
2016-11-04 01:16:01 +03:00
)
2019-06-07 23:29:29 +03:00
// CompareInfo represents needed information for comparing references.
type CompareInfo struct {
2021-02-16 18:39:45 +03:00
MergeBase string
BaseCommitID string
HeadCommitID string
2021-08-09 21:08:51 +03:00
Commits [ ] * Commit
2021-02-16 18:39:45 +03:00
NumFiles int
2016-11-04 01:16:01 +03:00
}
2019-06-12 02:32:08 +03:00
// GetMergeBase checks and returns merge base of two branches and the reference used as base.
2021-12-20 07:41:31 +03:00
func ( repo * Repository ) GetMergeBase ( tmpRemote , base , head string ) ( string , string , error ) {
2019-06-07 23:29:29 +03:00
if tmpRemote == "" {
tmpRemote = "origin"
}
if tmpRemote != "origin" {
2021-12-02 10:28:08 +03:00
tmpBaseName := RemotePrefix + tmpRemote + "/tmp_" + base
2019-06-07 23:29:29 +03:00
// Fetch commit into a temporary branch in order to be able to handle commits and tags
2022-10-23 17:44:45 +03:00
_ , _ , err := NewCommand ( repo . Ctx , "fetch" , "--no-tags" ) . AddDynamicArguments ( tmpRemote ) . AddDashesAndList ( base + ":" + tmpBaseName ) . RunStdString ( & RunOpts { Dir : repo . Path } )
2019-06-07 23:29:29 +03:00
if err == nil {
base = tmpBaseName
}
}
2022-10-23 17:44:45 +03:00
stdout , _ , err := NewCommand ( repo . Ctx , "merge-base" ) . AddDashesAndList ( base , head ) . RunStdString ( & RunOpts { Dir : repo . Path } )
2019-06-12 02:32:08 +03:00
return strings . TrimSpace ( stdout ) , base , err
2016-11-04 01:16:01 +03:00
}
2019-06-07 23:29:29 +03:00
// GetCompareInfo generates and returns compare information between base and head branches of repositories.
2021-10-15 19:05:33 +03:00
func ( repo * Repository ) GetCompareInfo ( basePath , baseBranch , headBranch string , directComparison , fileOnly bool ) ( _ * CompareInfo , err error ) {
2019-06-07 23:29:29 +03:00
var (
remoteBranch string
tmpRemote string
)
2016-11-04 01:16:01 +03:00
// We don't need a temporary remote for same repository.
if repo . Path != basePath {
// Add a temporary remote
2019-06-07 23:29:29 +03:00
tmpRemote = strconv . FormatInt ( time . Now ( ) . UnixNano ( ) , 10 )
2019-08-13 11:30:44 +03:00
if err = repo . AddRemote ( tmpRemote , basePath , false ) ; err != nil {
2022-10-24 22:29:17 +03:00
return nil , fmt . Errorf ( "AddRemote: %w" , err )
2016-11-04 01:16:01 +03:00
}
2019-06-12 22:41:28 +03:00
defer func ( ) {
if err := repo . RemoveRemote ( tmpRemote ) ; err != nil {
logger . Error ( "GetPullRequestInfo: RemoveRemote: %v" , err )
}
} ( )
2016-11-04 01:16:01 +03:00
}
2019-06-07 23:29:29 +03:00
compareInfo := new ( CompareInfo )
2021-02-16 18:39:45 +03:00
2022-01-20 02:26:57 +03:00
compareInfo . HeadCommitID , err = GetFullCommitID ( repo . Ctx , repo . Path , headBranch )
2021-02-16 18:39:45 +03:00
if err != nil {
compareInfo . HeadCommitID = headBranch
}
2019-06-12 02:32:08 +03:00
compareInfo . MergeBase , remoteBranch , err = repo . GetMergeBase ( tmpRemote , baseBranch , headBranch )
2019-04-09 23:45:58 +03:00
if err == nil {
2022-01-20 02:26:57 +03:00
compareInfo . BaseCommitID , err = GetFullCommitID ( repo . Ctx , repo . Path , remoteBranch )
2021-02-16 18:39:45 +03:00
if err != nil {
compareInfo . BaseCommitID = remoteBranch
}
2021-09-27 15:19:34 +03:00
separator := "..."
baseCommitID := compareInfo . MergeBase
if directComparison {
separator = ".."
baseCommitID = compareInfo . BaseCommitID
}
2020-07-29 20:53:04 +03:00
// We have a common base - therefore we know that ... should work
2021-10-15 19:05:33 +03:00
if ! fileOnly {
2022-04-01 05:55:30 +03:00
var logs [ ] byte
2022-10-23 17:44:45 +03:00
logs , _ , err = NewCommand ( repo . Ctx , "log" ) . AddDynamicArguments ( baseCommitID + separator + headBranch ) . AddArguments ( prettyLogFormat ) . RunStdBytes ( & RunOpts { Dir : repo . Path } )
2021-10-15 19:05:33 +03:00
if err != nil {
return nil , err
}
compareInfo . Commits , err = repo . parsePrettyFormatLogToList ( logs )
if err != nil {
2022-10-24 22:29:17 +03:00
return nil , fmt . Errorf ( "parsePrettyFormatLogToList: %w" , err )
2021-10-15 19:05:33 +03:00
}
} else {
compareInfo . Commits = [ ] * Commit { }
2019-04-09 23:45:58 +03:00
}
} else {
2021-08-09 21:08:51 +03:00
compareInfo . Commits = [ ] * Commit { }
2022-01-20 02:26:57 +03:00
compareInfo . MergeBase , err = GetFullCommitID ( repo . Ctx , repo . Path , remoteBranch )
2019-04-09 23:45:58 +03:00
if err != nil {
2019-06-07 23:29:29 +03:00
compareInfo . MergeBase = remoteBranch
2019-04-09 23:45:58 +03:00
}
2021-02-16 18:39:45 +03:00
compareInfo . BaseCommitID = compareInfo . MergeBase
2016-11-04 01:16:01 +03:00
}
// Count number of changed files.
2020-05-26 08:58:07 +03:00
// This probably should be removed as we need to use shortstat elsewhere
// Now there is git diff --shortstat but this appears to be slower than simply iterating with --nameonly
2021-09-27 15:19:34 +03:00
compareInfo . NumFiles , err = repo . GetDiffNumChangedFiles ( remoteBranch , headBranch , directComparison )
2016-11-04 01:16:01 +03:00
if err != nil {
return nil , err
}
2019-06-07 23:29:29 +03:00
return compareInfo , nil
2016-11-04 01:16:01 +03:00
}
2020-05-26 08:58:07 +03:00
type lineCountWriter struct {
numLines int
}
// Write counts the number of newlines in the provided bytestream
func ( l * lineCountWriter ) Write ( p [ ] byte ) ( n int , err error ) {
n = len ( p )
l . numLines += bytes . Count ( p , [ ] byte { '\000' } )
2022-06-20 13:02:49 +03:00
return n , err
2020-05-26 08:58:07 +03:00
}
// GetDiffNumChangedFiles counts the number of changed files
// This is substantially quicker than shortstat but...
2021-09-27 15:19:34 +03:00
func ( repo * Repository ) GetDiffNumChangedFiles ( base , head string , directComparison bool ) ( int , error ) {
2020-05-26 08:58:07 +03:00
// Now there is git diff --shortstat but this appears to be slower than simply iterating with --nameonly
w := & lineCountWriter { }
stderr := new ( bytes . Buffer )
2021-09-27 15:19:34 +03:00
separator := "..."
if directComparison {
separator = ".."
}
2022-10-23 17:44:45 +03:00
if err := NewCommand ( repo . Ctx , "diff" , "-z" , "--name-only" ) . AddDynamicArguments ( base + separator + head ) .
2022-04-01 05:55:30 +03:00
Run ( & RunOpts {
Dir : repo . Path ,
Stdout : w ,
Stderr : stderr ,
2022-02-11 15:47:22 +03:00
} ) ; err != nil {
2020-07-29 20:53:04 +03:00
if strings . Contains ( stderr . String ( ) , "no merge base" ) {
// git >= 2.28 now returns an error if base and head have become unrelated.
// previously it would return the results of git diff -z --name-only base head so let's try that...
w = & lineCountWriter { }
stderr . Reset ( )
2022-10-23 17:44:45 +03:00
if err = NewCommand ( repo . Ctx , "diff" , "-z" , "--name-only" ) . AddDynamicArguments ( base , head ) . Run ( & RunOpts {
2022-04-01 05:55:30 +03:00
Dir : repo . Path ,
Stdout : w ,
Stderr : stderr ,
2022-02-11 15:47:22 +03:00
} ) ; err == nil {
2020-07-29 20:53:04 +03:00
return w . numLines , nil
}
}
2022-10-24 22:29:17 +03:00
return 0 , fmt . Errorf ( "%w: Stderr: %s" , err , stderr )
2020-05-26 08:58:07 +03:00
}
return w . numLines , nil
}
// GetDiffShortStat counts number of changed files, number of additions and deletions
func ( repo * Repository ) GetDiffShortStat ( base , head string ) ( numFiles , totalAdditions , totalDeletions int , err error ) {
2022-10-23 17:44:45 +03:00
numFiles , totalAdditions , totalDeletions , err = GetDiffShortStat ( repo . Ctx , repo . Path , CmdArgCheck ( base + "..." + head ) )
2020-07-29 20:53:04 +03:00
if err != nil && strings . Contains ( err . Error ( ) , "no merge base" ) {
2022-10-23 17:44:45 +03:00
return GetDiffShortStat ( repo . Ctx , repo . Path , CmdArgCheck ( base ) , CmdArgCheck ( head ) )
2020-07-29 20:53:04 +03:00
}
2022-06-20 13:02:49 +03:00
return numFiles , totalAdditions , totalDeletions , err
2020-05-26 08:58:07 +03:00
}
// GetDiffShortStat counts number of changed files, number of additions and deletions
2022-10-23 17:44:45 +03:00
func GetDiffShortStat ( ctx context . Context , repoPath string , args ... CmdArg ) ( numFiles , totalAdditions , totalDeletions int , err error ) {
2020-05-26 08:58:07 +03:00
// Now if we call:
// $ git diff --shortstat 1ebb35b98889ff77299f24d82da426b434b0cca0...788b8b1440462d477f45b0088875
// we get:
// " 9902 files changed, 2034198 insertions(+), 298800 deletions(-)\n"
2022-10-23 17:44:45 +03:00
args = append ( [ ] CmdArg {
2020-05-26 08:58:07 +03:00
"diff" ,
"--shortstat" ,
} , args ... )
2022-04-01 05:55:30 +03:00
stdout , _ , err := NewCommand ( ctx , args ... ) . RunStdString ( & RunOpts { Dir : repoPath } )
2020-05-26 08:58:07 +03:00
if err != nil {
return 0 , 0 , 0 , err
}
return parseDiffStat ( stdout )
}
var shortStatFormat = regexp . MustCompile (
` \s*(\d+) files? changed(?:, (\d+) insertions?\(\+\))?(?:, (\d+) deletions?\(-\))? ` )
2021-12-23 11:32:29 +03:00
var patchCommits = regexp . MustCompile ( ` ^From\s(\w+)\s ` )
2020-05-26 08:58:07 +03:00
func parseDiffStat ( stdout string ) ( numFiles , totalAdditions , totalDeletions int , err error ) {
if len ( stdout ) == 0 || stdout == "\n" {
return 0 , 0 , 0 , nil
}
groups := shortStatFormat . FindStringSubmatch ( stdout )
if len ( groups ) != 4 {
return 0 , 0 , 0 , fmt . Errorf ( "unable to parse shortstat: %s groups: %s" , stdout , groups )
}
numFiles , err = strconv . Atoi ( groups [ 1 ] )
if err != nil {
2022-10-24 22:29:17 +03:00
return 0 , 0 , 0 , fmt . Errorf ( "unable to parse shortstat: %s. Error parsing NumFiles %w" , stdout , err )
2020-05-26 08:58:07 +03:00
}
if len ( groups [ 2 ] ) != 0 {
totalAdditions , err = strconv . Atoi ( groups [ 2 ] )
if err != nil {
2022-10-24 22:29:17 +03:00
return 0 , 0 , 0 , fmt . Errorf ( "unable to parse shortstat: %s. Error parsing NumAdditions %w" , stdout , err )
2020-05-26 08:58:07 +03:00
}
}
if len ( groups [ 3 ] ) != 0 {
totalDeletions , err = strconv . Atoi ( groups [ 3 ] )
if err != nil {
2022-10-24 22:29:17 +03:00
return 0 , 0 , 0 , fmt . Errorf ( "unable to parse shortstat: %s. Error parsing NumDeletions %w" , stdout , err )
2020-05-26 08:58:07 +03:00
}
}
2022-06-20 13:02:49 +03:00
return numFiles , totalAdditions , totalDeletions , err
2020-05-26 08:58:07 +03:00
}
2019-12-14 01:21:06 +03:00
// GetDiffOrPatch generates either diff or formatted patch data between given revisions
2021-09-28 00:09:49 +03:00
func ( repo * Repository ) GetDiffOrPatch ( base , head string , w io . Writer , patch , binary bool ) error {
if patch {
2019-12-14 01:21:06 +03:00
return repo . GetPatch ( base , head , w )
}
2021-09-28 00:09:49 +03:00
if binary {
return repo . GetDiffBinary ( base , head , w )
}
2019-12-14 01:21:06 +03:00
return repo . GetDiff ( base , head , w )
2016-11-04 01:16:01 +03:00
}
2018-01-07 16:10:20 +03:00
2021-09-28 00:09:49 +03:00
// GetDiff generates and returns patch data between given revisions, optimized for human readability
2019-12-14 01:21:06 +03:00
func ( repo * Repository ) GetDiff ( base , head string , w io . Writer ) error {
2022-10-23 17:44:45 +03:00
return NewCommand ( repo . Ctx , "diff" , "-p" ) . AddDynamicArguments ( base , head ) . Run ( & RunOpts {
2022-04-01 05:55:30 +03:00
Dir : repo . Path ,
Stdout : w ,
2022-02-11 15:47:22 +03:00
} )
2021-09-28 00:09:49 +03:00
}
// GetDiffBinary generates and returns patch data between given revisions, including binary diffs.
func ( repo * Repository ) GetDiffBinary ( base , head string , w io . Writer ) error {
2022-10-23 17:44:45 +03:00
return NewCommand ( repo . Ctx , "diff" , "-p" , "--binary" , "--histogram" ) . AddDynamicArguments ( base , head ) . Run ( & RunOpts {
2022-04-01 05:55:30 +03:00
Dir : repo . Path ,
Stdout : w ,
2022-02-11 15:47:22 +03:00
} )
2019-12-14 01:21:06 +03:00
}
2018-01-07 16:10:20 +03:00
2021-09-28 00:09:49 +03:00
// GetPatch generates and returns format-patch data between given revisions, able to be used with `git apply`
2019-12-14 01:21:06 +03:00
func ( repo * Repository ) GetPatch ( base , head string , w io . Writer ) error {
2020-07-29 20:53:04 +03:00
stderr := new ( bytes . Buffer )
2022-10-23 17:44:45 +03:00
err := NewCommand ( repo . Ctx , "format-patch" , "--binary" , "--stdout" ) . AddDynamicArguments ( base + "..." + head ) .
2022-04-01 05:55:30 +03:00
Run ( & RunOpts {
Dir : repo . Path ,
Stdout : w ,
Stderr : stderr ,
2022-02-11 15:47:22 +03:00
} )
2020-07-29 20:53:04 +03:00
if err != nil && bytes . Contains ( stderr . Bytes ( ) , [ ] byte ( "no merge base" ) ) {
2022-10-23 17:44:45 +03:00
return NewCommand ( repo . Ctx , "format-patch" , "--binary" , "--stdout" ) . AddDynamicArguments ( base , head ) .
2022-04-01 05:55:30 +03:00
Run ( & RunOpts {
Dir : repo . Path ,
Stdout : w ,
2022-02-11 15:47:22 +03:00
} )
2020-07-29 20:53:04 +03:00
}
return err
2018-01-07 16:10:20 +03:00
}
2020-01-09 04:47:45 +03:00
2022-05-07 21:28:10 +03:00
// GetFilesChangedBetween returns a list of all files that have been changed between the given commits
func ( repo * Repository ) GetFilesChangedBetween ( base , head string ) ( [ ] string , error ) {
2022-10-23 17:44:45 +03:00
stdout , _ , err := NewCommand ( repo . Ctx , "diff" , "--name-only" ) . AddDynamicArguments ( base + ".." + head ) . RunStdString ( & RunOpts { Dir : repo . Path } )
2022-05-07 21:28:10 +03:00
if err != nil {
return nil , err
}
return strings . Split ( stdout , "\n" ) , err
}
2020-01-09 04:47:45 +03:00
// GetDiffFromMergeBase generates and return patch data from merge base to head
func ( repo * Repository ) GetDiffFromMergeBase ( base , head string , w io . Writer ) error {
2020-07-29 20:53:04 +03:00
stderr := new ( bytes . Buffer )
2022-10-23 17:44:45 +03:00
err := NewCommand ( repo . Ctx , "diff" , "-p" , "--binary" ) . AddDynamicArguments ( base + "..." + head ) .
2022-04-01 05:55:30 +03:00
Run ( & RunOpts {
Dir : repo . Path ,
Stdout : w ,
Stderr : stderr ,
2022-02-11 15:47:22 +03:00
} )
2020-07-29 20:53:04 +03:00
if err != nil && bytes . Contains ( stderr . Bytes ( ) , [ ] byte ( "no merge base" ) ) {
2021-09-28 00:09:49 +03:00
return repo . GetDiffBinary ( base , head , w )
2020-07-29 20:53:04 +03:00
}
return err
2020-01-09 04:47:45 +03:00
}
2021-12-23 11:32:29 +03:00
// ReadPatchCommit will check if a diff patch exists and return stats
func ( repo * Repository ) ReadPatchCommit ( prID int64 ) ( commitSHA string , err error ) {
// Migrated repositories download patches to "pulls" location
patchFile := fmt . Sprintf ( "pulls/%d.patch" , prID )
loadPatch , err := os . Open ( filepath . Join ( repo . Path , patchFile ) )
if err != nil {
return "" , err
}
defer loadPatch . Close ( )
// Read only the first line of the patch - usually it contains the first commit made in patch
scanner := bufio . NewScanner ( loadPatch )
scanner . Scan ( )
// Parse the Patch stats, sometimes Migration returns a 404 for the patch file
commitSHAGroups := patchCommits . FindStringSubmatch ( scanner . Text ( ) )
if len ( commitSHAGroups ) != 0 {
commitSHA = commitSHAGroups [ 1 ]
} else {
return "" , errors . New ( "patch file doesn't contain valid commit ID" )
}
return commitSHA , nil
}