2014-04-13 05:35:36 +04:00
// Copyright 2014 The Gogs Authors. All rights reserved.
2019-09-06 05:20:09 +03:00
// Copyright 2019 The Gitea Authors. All rights reserved.
2014-04-13 05:35:36 +04:00
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
2019-09-06 05:20:09 +03:00
package gitdiff
2014-04-13 05:35:36 +04:00
import (
"bufio"
2014-12-09 10:18:25 +03:00
"bytes"
2019-11-30 17:40:22 +03:00
"context"
2014-06-19 09:08:03 +04:00
"fmt"
2016-01-09 09:51:17 +03:00
"html/template"
2014-04-13 05:35:36 +04:00
"io"
2015-12-14 17:38:21 +03:00
"io/ioutil"
2019-11-15 05:52:59 +03:00
"net/url"
2014-04-13 05:35:36 +04:00
"os"
"os/exec"
2020-08-08 13:17:02 +03:00
"regexp"
2018-08-06 07:43:22 +03:00
"sort"
2014-04-13 05:35:36 +04:00
"strings"
2019-09-06 05:20:09 +03:00
"code.gitea.io/gitea/models"
2019-08-15 15:07:28 +03:00
"code.gitea.io/gitea/modules/charset"
2019-03-27 12:33:00 +03:00
"code.gitea.io/gitea/modules/git"
2016-12-06 20:58:31 +03:00
"code.gitea.io/gitea/modules/highlight"
2016-11-10 19:24:48 +03:00
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/process"
"code.gitea.io/gitea/modules/setting"
2019-08-23 19:40:30 +03:00
2016-11-05 19:56:35 +03:00
"github.com/sergi/go-diff/diffmatchpatch"
2019-08-15 15:07:28 +03:00
stdcharset "golang.org/x/net/html/charset"
2016-11-05 19:56:35 +03:00
"golang.org/x/text/transform"
2014-04-13 05:35:36 +04:00
)
2016-11-24 11:30:08 +03:00
// DiffLineType represents the type of a DiffLine.
2016-01-06 23:00:40 +03:00
type DiffLineType uint8
2016-11-22 14:08:23 +03:00
// DiffLineType possible values.
2014-04-13 05:35:36 +04:00
const (
2016-11-07 19:24:59 +03:00
DiffLinePlain DiffLineType = iota + 1
DiffLineAdd
DiffLineDel
2016-11-07 19:33:03 +03:00
DiffLineSection
2014-04-13 05:35:36 +04:00
)
2016-11-24 11:30:08 +03:00
// DiffFileType represents the type of a DiffFile.
2016-01-06 23:00:40 +03:00
type DiffFileType uint8
2016-11-22 14:08:23 +03:00
// DiffFileType possible values.
2014-04-13 05:35:36 +04:00
const (
2016-11-07 19:24:59 +03:00
DiffFileAdd DiffFileType = iota + 1
DiffFileChange
DiffFileDel
2016-11-07 19:33:03 +03:00
DiffFileRename
2020-09-09 16:08:40 +03:00
DiffFileCopy
2014-04-13 05:35:36 +04:00
)
2019-11-15 05:52:59 +03:00
// DiffLineExpandDirection represents the DiffLineSection expand direction
type DiffLineExpandDirection uint8
// DiffLineExpandDirection possible values.
const (
DiffLineExpandNone DiffLineExpandDirection = iota + 1
DiffLineExpandSingle
DiffLineExpandUpDown
DiffLineExpandUp
DiffLineExpandDown
)
2016-11-24 11:30:08 +03:00
// DiffLine represents a line difference in a DiffSection.
2014-04-13 05:35:36 +04:00
type DiffLine struct {
2019-11-15 05:52:59 +03:00
LeftIdx int
RightIdx int
Type DiffLineType
Content string
Comments [ ] * models . Comment
SectionInfo * DiffLineSectionInfo
}
// DiffLineSectionInfo represents diff line section meta data
type DiffLineSectionInfo struct {
Path string
LastLeftIdx int
LastRightIdx int
LeftIdx int
RightIdx int
LeftHunkSize int
RightHunkSize int
2014-04-13 05:35:36 +04:00
}
2020-08-20 17:53:06 +03:00
// BlobExcerptChunkSize represent max lines of excerpt
const BlobExcerptChunkSize = 20
2019-11-15 05:52:59 +03:00
2016-11-24 11:30:08 +03:00
// GetType returns the type of a DiffLine.
2016-01-06 23:00:40 +03:00
func ( d * DiffLine ) GetType ( ) int {
return int ( d . Type )
2014-04-13 05:35:36 +04:00
}
2018-08-06 07:43:22 +03:00
// CanComment returns whether or not a line can get commented
func ( d * DiffLine ) CanComment ( ) bool {
return len ( d . Comments ) == 0 && d . Type != DiffLineSection
}
// GetCommentSide returns the comment side of the first comment, if not set returns empty string
func ( d * DiffLine ) GetCommentSide ( ) string {
if len ( d . Comments ) == 0 {
return ""
}
return d . Comments [ 0 ] . DiffSide ( )
}
2019-06-24 23:23:52 +03:00
// GetLineTypeMarker returns the line type marker
func ( d * DiffLine ) GetLineTypeMarker ( ) string {
if strings . IndexByte ( " +-" , d . Content [ 0 ] ) > - 1 {
return d . Content [ 0 : 1 ]
}
return ""
}
2019-11-15 05:52:59 +03:00
// GetBlobExcerptQuery builds query string to get blob excerpt
func ( d * DiffLine ) GetBlobExcerptQuery ( ) string {
query := fmt . Sprintf (
"last_left=%d&last_right=%d&" +
"left=%d&right=%d&" +
"left_hunk_size=%d&right_hunk_size=%d&" +
"path=%s" ,
d . SectionInfo . LastLeftIdx , d . SectionInfo . LastRightIdx ,
d . SectionInfo . LeftIdx , d . SectionInfo . RightIdx ,
d . SectionInfo . LeftHunkSize , d . SectionInfo . RightHunkSize ,
url . QueryEscape ( d . SectionInfo . Path ) )
return query
}
// GetExpandDirection gets DiffLineExpandDirection
func ( d * DiffLine ) GetExpandDirection ( ) DiffLineExpandDirection {
if d . Type != DiffLineSection || d . SectionInfo == nil || d . SectionInfo . RightIdx - d . SectionInfo . LastRightIdx <= 1 {
return DiffLineExpandNone
}
if d . SectionInfo . LastLeftIdx <= 0 && d . SectionInfo . LastRightIdx <= 0 {
return DiffLineExpandUp
2020-08-20 17:53:06 +03:00
} else if d . SectionInfo . RightIdx - d . SectionInfo . LastRightIdx > BlobExcerptChunkSize && d . SectionInfo . RightHunkSize > 0 {
2019-11-15 05:52:59 +03:00
return DiffLineExpandUpDown
} else if d . SectionInfo . LeftHunkSize <= 0 && d . SectionInfo . RightHunkSize <= 0 {
return DiffLineExpandDown
}
return DiffLineExpandSingle
}
2020-01-23 20:28:15 +03:00
func getDiffLineSectionInfo ( treePath , line string , lastLeftIdx , lastRightIdx int ) * DiffLineSectionInfo {
2020-01-28 11:02:03 +03:00
leftLine , leftHunk , rightLine , righHunk := git . ParseDiffHunkString ( line )
2020-01-23 20:28:15 +03:00
2019-11-15 05:52:59 +03:00
return & DiffLineSectionInfo {
2020-01-23 20:28:15 +03:00
Path : treePath ,
2019-11-15 05:52:59 +03:00
LastLeftIdx : lastLeftIdx ,
LastRightIdx : lastRightIdx ,
LeftIdx : leftLine ,
RightIdx : rightLine ,
LeftHunkSize : leftHunk ,
RightHunkSize : righHunk ,
}
}
2019-06-27 00:35:07 +03:00
// escape a line's content or return <br> needed for copy/paste purposes
func getLineContent ( content string ) string {
if len ( content ) > 0 {
2020-07-01 00:34:03 +03:00
return content
2019-06-27 00:35:07 +03:00
}
2020-07-01 00:34:03 +03:00
return "\n"
2019-06-27 00:35:07 +03:00
}
2016-11-24 11:30:08 +03:00
// DiffSection represents a section of a DiffFile.
2014-04-13 05:35:36 +04:00
type DiffSection struct {
2020-07-01 00:34:03 +03:00
FileName string
Name string
Lines [ ] * DiffLine
2014-04-13 05:35:36 +04:00
}
2016-01-09 09:51:17 +03:00
var (
2019-06-24 23:23:52 +03:00
addedCodePrefix = [ ] byte ( ` <span class="added-code"> ` )
removedCodePrefix = [ ] byte ( ` <span class="removed-code"> ` )
codeTagSuffix = [ ] byte ( ` </span> ` )
2016-01-09 09:51:17 +03:00
)
2020-10-22 01:14:44 +03:00
var trailingSpanRegex = regexp . MustCompile ( ` <span\s*[[:alpha:]="]*?[>]?$ ` )
2020-11-05 03:35:05 +03:00
var entityRegex = regexp . MustCompile ( ` &[#]*?[0-9[:alpha:]]*$ ` )
2020-10-22 01:14:44 +03:00
// shouldWriteInline represents combinations where we manually write inline changes
func shouldWriteInline ( diff diffmatchpatch . Diff , lineType DiffLineType ) bool {
if true &&
diff . Type == diffmatchpatch . DiffEqual ||
diff . Type == diffmatchpatch . DiffInsert && lineType == DiffLineAdd ||
diff . Type == diffmatchpatch . DiffDelete && lineType == DiffLineDel {
return true
}
return false
}
2016-01-09 09:51:17 +03:00
2020-07-01 00:34:03 +03:00
func diffToHTML ( fileName string , diffs [ ] diffmatchpatch . Diff , lineType DiffLineType ) template . HTML {
2016-08-07 19:49:47 +03:00
buf := bytes . NewBuffer ( nil )
2020-10-22 01:14:44 +03:00
match := ""
for _ , diff := range diffs {
if shouldWriteInline ( diff , lineType ) {
if len ( match ) > 0 {
diff . Text = match + diff . Text
match = ""
2020-07-16 16:58:54 +03:00
}
2020-10-22 01:14:44 +03:00
// Chroma HTML syntax highlighting is done before diffing individual lines in order to maintain consistency.
2020-11-05 03:35:05 +03:00
// Since inline changes might split in the middle of a chroma span tag or HTML entity, make we manually put it back together
// before writing so we don't try insert added/removed code spans in the middle of one of those
// and create broken HTML. This is done by moving incomplete HTML forward until it no longer matches our pattern of
// a line ending with an incomplete HTML entity or partial/opening <span>.
// EX:
// diffs[{Type: dmp.DiffDelete, Text: "language</span><span "},
// {Type: dmp.DiffEqual, Text: "c"},
// {Type: dmp.DiffDelete, Text: "lass="p">}]
// After first iteration
// diffs[{Type: dmp.DiffDelete, Text: "language</span>"}, //write out
// {Type: dmp.DiffEqual, Text: "<span c"},
// {Type: dmp.DiffDelete, Text: "lass="p">,</span>}]
// After second iteration
// {Type: dmp.DiffEqual, Text: ""}, // write out
// {Type: dmp.DiffDelete, Text: "<span class="p">,</span>}]
// Final
// {Type: dmp.DiffDelete, Text: "<span class="p">,</span>}]
// end up writing <span class="removed-code"><span class="p">,</span></span>
// Instead of <span class="removed-code">lass="p",</span></span>
2020-10-22 01:14:44 +03:00
m := trailingSpanRegex . FindStringSubmatchIndex ( diff . Text )
2020-08-08 13:17:02 +03:00
if m != nil {
2020-10-22 01:14:44 +03:00
match = diff . Text [ m [ 0 ] : m [ 1 ] ]
diff . Text = strings . TrimSuffix ( diff . Text , match )
2020-07-16 16:58:54 +03:00
}
2020-11-05 03:35:05 +03:00
m = entityRegex . FindStringSubmatchIndex ( diff . Text )
if m != nil {
match = diff . Text [ m [ 0 ] : m [ 1 ] ]
diff . Text = strings . TrimSuffix ( diff . Text , match )
}
2020-10-22 01:14:44 +03:00
// Print an existing closing span first before opening added/remove-code span so it doesn't unintentionally close it
if strings . HasPrefix ( diff . Text , "</span>" ) {
2020-07-16 16:58:54 +03:00
buf . WriteString ( "</span>" )
2020-10-22 01:14:44 +03:00
diff . Text = strings . TrimPrefix ( diff . Text , "</span>" )
2020-07-16 16:58:54 +03:00
}
2020-10-22 01:14:44 +03:00
// If we weren't able to fix it then this should avoid broken HTML by not inserting more spans below
// The previous/next diff section will contain the rest of the tag that is missing here
if strings . Count ( diff . Text , "<" ) != strings . Count ( diff . Text , ">" ) {
buf . WriteString ( diff . Text )
continue
2020-07-16 16:58:54 +03:00
}
2020-10-22 01:14:44 +03:00
}
switch {
case diff . Type == diffmatchpatch . DiffEqual :
buf . WriteString ( diff . Text )
case diff . Type == diffmatchpatch . DiffInsert && lineType == DiffLineAdd :
2016-01-09 09:51:17 +03:00
buf . Write ( addedCodePrefix )
2020-10-22 01:14:44 +03:00
buf . WriteString ( diff . Text )
2016-01-09 09:51:17 +03:00
buf . Write ( codeTagSuffix )
2020-10-22 01:14:44 +03:00
case diff . Type == diffmatchpatch . DiffDelete && lineType == DiffLineDel :
2016-01-09 09:51:17 +03:00
buf . Write ( removedCodePrefix )
2020-10-22 01:14:44 +03:00
buf . WriteString ( diff . Text )
2016-01-09 09:51:17 +03:00
buf . Write ( codeTagSuffix )
2016-01-04 00:26:46 +03:00
}
}
2016-01-09 09:51:17 +03:00
return template . HTML ( buf . Bytes ( ) )
2016-01-04 00:26:46 +03:00
}
2016-11-22 14:08:23 +03:00
// GetLine gets a specific line by type (add or del) and file line number
2016-01-08 15:50:25 +03:00
func ( diffSection * DiffSection ) GetLine ( lineType DiffLineType , idx int ) * DiffLine {
2016-08-07 19:49:47 +03:00
var (
difference = 0
addCount = 0
delCount = 0
matchDiffLine * DiffLine
)
LOOP :
2016-01-08 15:50:25 +03:00
for _ , diffLine := range diffSection . Lines {
2016-08-07 19:49:47 +03:00
switch diffLine . Type {
2016-11-07 19:24:59 +03:00
case DiffLineAdd :
2016-08-07 19:49:47 +03:00
addCount ++
2016-11-07 19:24:59 +03:00
case DiffLineDel :
2016-08-07 19:49:47 +03:00
delCount ++
default :
if matchDiffLine != nil {
break LOOP
}
2016-01-08 15:50:25 +03:00
difference = diffLine . RightIdx - diffLine . LeftIdx
2016-08-07 19:49:47 +03:00
addCount = 0
delCount = 0
2016-01-04 00:26:46 +03:00
}
2016-08-07 19:49:47 +03:00
switch lineType {
2016-11-07 19:24:59 +03:00
case DiffLineDel :
2016-01-09 09:51:17 +03:00
if diffLine . RightIdx == 0 && diffLine . LeftIdx == idx - difference {
2016-08-07 19:49:47 +03:00
matchDiffLine = diffLine
2016-01-08 15:50:25 +03:00
}
2016-11-07 19:24:59 +03:00
case DiffLineAdd :
2016-01-09 09:51:17 +03:00
if diffLine . LeftIdx == 0 && diffLine . RightIdx == idx + difference {
2016-08-07 19:49:47 +03:00
matchDiffLine = diffLine
2016-01-04 00:26:46 +03:00
}
}
}
2016-08-07 19:49:47 +03:00
if addCount == delCount {
return matchDiffLine
}
2016-01-04 00:26:46 +03:00
return nil
}
2016-08-07 19:49:47 +03:00
var diffMatchPatch = diffmatchpatch . New ( )
func init ( ) {
diffMatchPatch . DiffEditCost = 100
}
2016-11-22 14:08:23 +03:00
// GetComputedInlineDiffFor computes inline diff for the given line.
2016-01-27 23:54:08 +03:00
func ( diffSection * DiffSection ) GetComputedInlineDiffFor ( diffLine * DiffLine ) template . HTML {
2016-08-07 19:49:47 +03:00
if setting . Git . DisableDiffHighlight {
2019-06-27 00:35:07 +03:00
return template . HTML ( getLineContent ( diffLine . Content [ 1 : ] ) )
2016-01-27 23:54:08 +03:00
}
2020-07-01 00:34:03 +03:00
2016-08-07 19:49:47 +03:00
var (
compareDiffLine * DiffLine
diff1 string
diff2 string
)
2016-01-04 00:26:46 +03:00
2016-01-27 23:54:08 +03:00
// try to find equivalent diff line. ignore, otherwise
2016-08-07 19:49:47 +03:00
switch diffLine . Type {
2020-07-01 00:34:03 +03:00
case DiffLineSection :
return template . HTML ( getLineContent ( diffLine . Content [ 1 : ] ) )
2016-11-07 19:24:59 +03:00
case DiffLineAdd :
compareDiffLine = diffSection . GetLine ( DiffLineDel , diffLine . RightIdx )
2016-01-27 23:54:08 +03:00
if compareDiffLine == nil {
2020-07-09 00:02:38 +03:00
return template . HTML ( highlight . Code ( diffSection . FileName , diffLine . Content [ 1 : ] ) )
2016-01-27 23:54:08 +03:00
}
diff1 = compareDiffLine . Content
diff2 = diffLine . Content
2016-11-07 19:24:59 +03:00
case DiffLineDel :
compareDiffLine = diffSection . GetLine ( DiffLineAdd , diffLine . LeftIdx )
2016-01-27 23:54:08 +03:00
if compareDiffLine == nil {
2020-07-09 00:02:38 +03:00
return template . HTML ( highlight . Code ( diffSection . FileName , diffLine . Content [ 1 : ] ) )
2016-01-04 00:26:46 +03:00
}
2016-01-27 23:54:08 +03:00
diff1 = diffLine . Content
diff2 = compareDiffLine . Content
2016-08-07 19:49:47 +03:00
default :
2019-06-24 23:23:52 +03:00
if strings . IndexByte ( " +-" , diffLine . Content [ 0 ] ) > - 1 {
2020-07-09 00:02:38 +03:00
return template . HTML ( highlight . Code ( diffSection . FileName , diffLine . Content [ 1 : ] ) )
2019-06-24 23:23:52 +03:00
}
2020-07-01 00:34:03 +03:00
return template . HTML ( highlight . Code ( diffSection . FileName , diffLine . Content ) )
2016-01-27 23:54:08 +03:00
}
2016-01-04 00:26:46 +03:00
2020-07-11 08:43:12 +03:00
diffRecord := diffMatchPatch . DiffMain ( highlight . Code ( diffSection . FileName , diff1 [ 1 : ] ) , highlight . Code ( diffSection . FileName , diff2 [ 1 : ] ) , true )
2016-08-07 19:49:47 +03:00
diffRecord = diffMatchPatch . DiffCleanupEfficiency ( diffRecord )
2020-10-31 20:24:32 +03:00
diffRecord = diffMatchPatch . DiffCleanupEfficiency ( diffRecord )
2020-07-01 00:34:03 +03:00
return diffToHTML ( diffSection . FileName , diffRecord , diffLine . Type )
2016-01-04 00:26:46 +03:00
}
2016-11-24 11:30:08 +03:00
// DiffFile represents a file diff.
2014-04-13 05:35:36 +04:00
type DiffFile struct {
Name string
2015-11-03 03:55:24 +03:00
OldName string
2014-05-13 20:40:32 +04:00
Index int
2014-04-13 05:35:36 +04:00
Addition , Deletion int
2016-01-06 23:00:40 +03:00
Type DiffFileType
2015-02-06 12:02:32 +03:00
IsCreated bool
IsDeleted bool
2014-04-16 04:01:20 +04:00
IsBin bool
2016-12-26 04:16:37 +03:00
IsLFSFile bool
2015-11-03 03:55:24 +03:00
IsRenamed bool
2016-07-22 21:18:56 +03:00
IsSubmodule bool
2014-04-13 05:35:36 +04:00
Sections [ ] * DiffSection
2016-06-29 18:11:00 +03:00
IsIncomplete bool
2020-10-13 21:50:57 +03:00
IsProtected bool
2014-04-13 05:35:36 +04:00
}
2016-11-22 14:08:23 +03:00
// GetType returns type of diff file.
2016-01-06 23:00:40 +03:00
func ( diffFile * DiffFile ) GetType ( ) int {
return int ( diffFile . Type )
}
2019-11-15 05:52:59 +03:00
// GetTailSection creates a fake DiffLineSection if the last section is not the end of the file
func ( diffFile * DiffFile ) GetTailSection ( gitRepo * git . Repository , leftCommitID , rightCommitID string ) * DiffSection {
2020-01-22 10:20:58 +03:00
if len ( diffFile . Sections ) == 0 || diffFile . Type != DiffFileChange || diffFile . IsBin || diffFile . IsLFSFile {
2019-11-15 05:52:59 +03:00
return nil
}
leftCommit , err := gitRepo . GetCommit ( leftCommitID )
if err != nil {
return nil
}
rightCommit , err := gitRepo . GetCommit ( rightCommitID )
if err != nil {
return nil
}
lastSection := diffFile . Sections [ len ( diffFile . Sections ) - 1 ]
lastLine := lastSection . Lines [ len ( lastSection . Lines ) - 1 ]
leftLineCount := getCommitFileLineCount ( leftCommit , diffFile . Name )
rightLineCount := getCommitFileLineCount ( rightCommit , diffFile . Name )
if leftLineCount <= lastLine . LeftIdx || rightLineCount <= lastLine . RightIdx {
return nil
}
tailDiffLine := & DiffLine {
Type : DiffLineSection ,
Content : " " ,
SectionInfo : & DiffLineSectionInfo {
Path : diffFile . Name ,
LastLeftIdx : lastLine . LeftIdx ,
LastRightIdx : lastLine . RightIdx ,
LeftIdx : leftLineCount ,
RightIdx : rightLineCount ,
} }
2020-07-01 00:34:03 +03:00
tailSection := & DiffSection { FileName : diffFile . Name , Lines : [ ] * DiffLine { tailDiffLine } }
2019-11-15 05:52:59 +03:00
return tailSection
}
func getCommitFileLineCount ( commit * git . Commit , filePath string ) int {
blob , err := commit . GetBlobByPath ( filePath )
if err != nil {
return 0
}
lineCount , err := blob . GetBlobLineCount ( )
if err != nil {
return 0
}
return lineCount
}
2016-11-24 11:30:08 +03:00
// Diff represents a difference between two git trees.
2014-04-13 05:35:36 +04:00
type Diff struct {
2020-05-26 08:58:07 +03:00
NumFiles , TotalAddition , TotalDeletion int
Files [ ] * DiffFile
IsIncomplete bool
2014-04-13 05:35:36 +04:00
}
2018-08-06 07:43:22 +03:00
// LoadComments loads comments into each line
2019-09-06 05:20:09 +03:00
func ( diff * Diff ) LoadComments ( issue * models . Issue , currentUser * models . User ) error {
allComments , err := models . FetchCodeComments ( issue , currentUser )
2018-08-06 07:43:22 +03:00
if err != nil {
return err
}
for _ , file := range diff . Files {
if lineCommits , ok := allComments [ file . Name ] ; ok {
for _ , section := range file . Sections {
for _ , line := range section . Lines {
if comments , ok := lineCommits [ int64 ( line . LeftIdx * - 1 ) ] ; ok {
line . Comments = append ( line . Comments , comments ... )
}
if comments , ok := lineCommits [ int64 ( line . RightIdx ) ] ; ok {
line . Comments = append ( line . Comments , comments ... )
}
sort . SliceStable ( line . Comments , func ( i , j int ) bool {
return line . Comments [ i ] . CreatedUnix < line . Comments [ j ] . CreatedUnix
} )
}
}
}
}
return nil
}
2016-11-22 14:08:23 +03:00
const cmdDiffHead = "diff --git "
2014-04-13 05:35:36 +04:00
2020-10-16 20:13:18 +03:00
// ParsePatch builds a Diff object from a io.Reader and some parameters.
2017-01-05 03:50:34 +03:00
func ParsePatch ( maxLines , maxLineCharacters , maxFiles int , reader io . Reader ) ( * Diff , error ) {
2020-10-16 20:13:18 +03:00
var curFile * DiffFile
diff := & Diff { Files : make ( [ ] * DiffFile , 0 ) }
sb := strings . Builder { }
// OK let's set a reasonable buffer size.
// This should be let's say at least the size of maxLineCharacters or 4096 whichever is larger.
readerSize := maxLineCharacters
if readerSize < 4096 {
readerSize = 4096
}
input := bufio . NewReaderSize ( reader , readerSize )
line , err := input . ReadString ( '\n' )
if err != nil {
if err == io . EOF {
return diff , nil
}
return diff , err
}
parsingLoop :
for {
// 1. A patch file always begins with `diff --git ` + `a/path b/path` (possibly quoted)
// if it does not we have bad input!
if ! strings . HasPrefix ( line , cmdDiffHead ) {
return diff , fmt . Errorf ( "Invalid first file line: %s" , line )
2014-04-13 05:35:36 +04:00
}
2020-10-16 20:13:18 +03:00
// TODO: Handle skipping first n files
if len ( diff . Files ) >= maxFiles {
diff . IsIncomplete = true
_ , err := io . Copy ( ioutil . Discard , reader )
if err != nil {
// By the definition of io.Copy this never returns io.EOF
return diff , fmt . Errorf ( "Copy: %v" , err )
}
break parsingLoop
}
2014-04-13 05:35:36 +04:00
2020-10-16 20:13:18 +03:00
curFile = createDiffFile ( diff , line )
diff . Files = append ( diff . Files , curFile )
// 2. It is followed by one or more extended header lines:
//
// old mode <mode>
// new mode <mode>
// deleted file mode <mode>
// new file mode <mode>
// copy from <path>
// copy to <path>
// rename from <path>
// rename to <path>
// similarity index <number>
// dissimilarity index <number>
// index <hash>..<hash> <mode>
//
// * <mode> 6-digit octal numbers including the file type and file permission bits.
// * <path> does not include the a/ and b/ prefixes
// * <number> percentage of unchanged lines for similarity, percentage of changed
// lines dissimilarity as integer rounded down with terminal %. 100% => equal files.
// * The index line includes the blob object names before and after the change.
// The <mode> is included if the file mode does not change; otherwise, separate
// lines indicate the old and the new mode.
// 3. Following this header the "standard unified" diff format header may be encountered: (but not for every case...)
//
// --- a/<path>
// +++ b/<path>
//
// With multiple hunks
//
// @@ <hunk descriptor> @@
// +added line
// -removed line
// unchanged line
//
// 4. Binary files get:
//
// Binary files a/<path> and b/<path> differ
//
// but one of a/<path> and b/<path> could be /dev/null.
curFileLoop :
2017-11-29 02:22:24 +03:00
for {
2020-10-16 20:13:18 +03:00
line , err = input . ReadString ( '\n' )
2017-11-29 02:22:24 +03:00
if err != nil {
2020-10-16 20:13:18 +03:00
if err != io . EOF {
return diff , err
2017-11-29 02:22:24 +03:00
}
2020-10-16 20:13:18 +03:00
break parsingLoop
2017-11-29 02:22:24 +03:00
}
2020-10-16 20:13:18 +03:00
switch {
2020-10-22 02:08:44 +03:00
case strings . HasPrefix ( line , cmdDiffHead ) :
break curFileLoop
2020-10-16 20:13:18 +03:00
case strings . HasPrefix ( line , "old mode " ) ||
strings . HasPrefix ( line , "new mode " ) :
if strings . HasSuffix ( line , " 160000\n" ) {
curFile . IsSubmodule = true
}
case strings . HasPrefix ( line , "copy from " ) :
curFile . IsRenamed = true
curFile . Type = DiffFileCopy
case strings . HasPrefix ( line , "copy to " ) :
curFile . IsRenamed = true
curFile . Type = DiffFileCopy
case strings . HasPrefix ( line , "new file" ) :
curFile . Type = DiffFileAdd
curFile . IsCreated = true
if strings . HasSuffix ( line , " 160000\n" ) {
curFile . IsSubmodule = true
}
case strings . HasPrefix ( line , "deleted" ) :
curFile . Type = DiffFileDel
curFile . IsDeleted = true
if strings . HasSuffix ( line , " 160000\n" ) {
curFile . IsSubmodule = true
}
case strings . HasPrefix ( line , "index" ) :
if strings . HasSuffix ( line , " 160000\n" ) {
curFile . IsSubmodule = true
}
case strings . HasPrefix ( line , "similarity index 100%" ) :
curFile . Type = DiffFileRename
case strings . HasPrefix ( line , "Binary" ) :
curFile . IsBin = true
case strings . HasPrefix ( line , "--- " ) :
// Do nothing with this line
case strings . HasPrefix ( line , "+++ " ) :
// Do nothing with this line
lineBytes , isFragment , err := parseHunks ( curFile , maxLines , maxLineCharacters , input )
diff . TotalAddition += curFile . Addition
diff . TotalDeletion += curFile . Deletion
if err != nil {
if err != io . EOF {
return diff , err
}
break parsingLoop
}
sb . Reset ( )
_ , _ = sb . Write ( lineBytes )
for isFragment {
lineBytes , isFragment , err = input . ReadLine ( )
if err != nil {
// Now by the definition of ReadLine this cannot be io.EOF
return diff , fmt . Errorf ( "Unable to ReadLine: %v" , err )
}
_ , _ = sb . Write ( lineBytes )
}
line = sb . String ( )
sb . Reset ( )
break curFileLoop
2015-12-02 09:10:13 +03:00
}
}
2014-04-13 05:35:36 +04:00
2020-10-16 20:13:18 +03:00
}
// FIXME: There are numerous issues with this:
// - we might want to consider detecting encoding while parsing but...
// - we're likely to fail to get the correct encoding here anyway as we won't have enough information
// - and this doesn't really account for changes in encoding
var buf bytes . Buffer
for _ , f := range diff . Files {
buf . Reset ( )
for _ , sec := range f . Sections {
for _ , l := range sec . Lines {
if l . Type == DiffLineSection {
continue
}
buf . WriteString ( l . Content [ 1 : ] )
buf . WriteString ( "\n" )
}
}
charsetLabel , err := charset . DetectEncoding ( buf . Bytes ( ) )
if charsetLabel != "UTF-8" && err == nil {
encoding , _ := stdcharset . Lookup ( charsetLabel )
if encoding != nil {
d := encoding . NewDecoder ( )
for _ , sec := range f . Sections {
for _ , l := range sec . Lines {
if l . Type == DiffLineSection {
continue
}
if c , _ , err := transform . String ( d , l . Content [ 1 : ] ) ; err == nil {
l . Content = l . Content [ 0 : 1 ] + c
}
}
}
}
2014-09-17 08:03:03 +04:00
}
2020-10-16 20:13:18 +03:00
}
2014-09-17 08:03:03 +04:00
2020-10-16 20:13:18 +03:00
diff . NumFiles = len ( diff . Files )
return diff , nil
}
2016-12-26 04:16:37 +03:00
2020-10-16 20:13:18 +03:00
func parseHunks ( curFile * DiffFile , maxLines , maxLineCharacters int , input * bufio . Reader ) ( lineBytes [ ] byte , isFragment bool , err error ) {
sb := strings . Builder { }
2016-12-26 04:16:37 +03:00
2020-10-16 20:13:18 +03:00
var (
curSection * DiffSection
curFileLinesCount int
curFileLFSPrefix bool
)
2016-12-26 04:16:37 +03:00
2020-10-16 20:13:18 +03:00
leftLine , rightLine := 1 , 1
2016-12-26 04:16:37 +03:00
2020-10-16 20:13:18 +03:00
for {
2020-11-22 01:41:24 +03:00
for isFragment {
curFile . IsIncomplete = true
_ , isFragment , err = input . ReadLine ( )
if err != nil {
// Now by the definition of ReadLine this cannot be io.EOF
err = fmt . Errorf ( "Unable to ReadLine: %v" , err )
return
}
}
2020-10-16 20:13:18 +03:00
sb . Reset ( )
lineBytes , isFragment , err = input . ReadLine ( )
if err != nil {
if err == io . EOF {
return
2016-12-26 04:16:37 +03:00
}
2020-10-16 20:13:18 +03:00
err = fmt . Errorf ( "Unable to ReadLine: %v" , err )
return
}
if lineBytes [ 0 ] == 'd' {
// End of hunks
return
2016-12-26 04:16:37 +03:00
}
2020-10-16 20:13:18 +03:00
switch lineBytes [ 0 ] {
case '@' :
if curFileLinesCount >= maxLines {
curFile . IsIncomplete = true
continue
}
2014-04-13 05:35:36 +04:00
2020-10-16 20:13:18 +03:00
_ , _ = sb . Write ( lineBytes )
for isFragment {
// This is very odd indeed - we're in a section header and the line is too long
// This really shouldn't happen...
lineBytes , isFragment , err = input . ReadLine ( )
if err != nil {
// Now by the definition of ReadLine this cannot be io.EOF
err = fmt . Errorf ( "Unable to ReadLine: %v" , err )
return
}
_ , _ = sb . Write ( lineBytes )
}
line := sb . String ( )
// Create a new section to represent this hunk
2014-04-13 05:35:36 +04:00
curSection = & DiffSection { }
curFile . Sections = append ( curFile . Sections , curSection )
2020-10-16 20:13:18 +03:00
2020-01-23 20:28:15 +03:00
lineSectionInfo := getDiffLineSectionInfo ( curFile . Name , line , leftLine - 1 , rightLine - 1 )
2019-11-15 05:52:59 +03:00
diffLine := & DiffLine {
Type : DiffLineSection ,
Content : line ,
SectionInfo : lineSectionInfo ,
2015-07-29 17:55:01 +03:00
}
2019-11-15 05:52:59 +03:00
curSection . Lines = append ( curSection . Lines , diffLine )
2020-07-01 00:34:03 +03:00
curSection . FileName = curFile . Name
2019-11-15 05:52:59 +03:00
// update line number.
leftLine = lineSectionInfo . LeftIdx
rightLine = lineSectionInfo . RightIdx
2014-04-13 05:35:36 +04:00
continue
2020-10-16 20:13:18 +03:00
case '\\' :
if curFileLinesCount >= maxLines {
curFile . IsIncomplete = true
continue
}
// This is used only to indicate that the current file does not have a terminal newline
if ! bytes . Equal ( lineBytes , [ ] byte ( "\\ No newline at end of file" ) ) {
err = fmt . Errorf ( "Unexpected line in hunk: %s" , string ( lineBytes ) )
return
}
// Technically this should be the end the file!
// FIXME: we should be putting a marker at the end of the file if there is no terminal new line
continue
case '+' :
curFileLinesCount ++
2014-04-13 05:35:36 +04:00
curFile . Addition ++
2020-10-16 20:13:18 +03:00
if curFileLinesCount >= maxLines {
curFile . IsIncomplete = true
continue
}
diffLine := & DiffLine { Type : DiffLineAdd , RightIdx : rightLine }
2014-04-13 05:35:36 +04:00
rightLine ++
curSection . Lines = append ( curSection . Lines , diffLine )
2020-10-16 20:13:18 +03:00
case '-' :
curFileLinesCount ++
2014-04-13 05:35:36 +04:00
curFile . Deletion ++
2020-10-16 20:13:18 +03:00
if curFileLinesCount >= maxLines {
curFile . IsIncomplete = true
continue
}
diffLine := & DiffLine { Type : DiffLineDel , LeftIdx : leftLine }
2014-04-13 05:35:36 +04:00
if leftLine > 0 {
leftLine ++
}
curSection . Lines = append ( curSection . Lines , diffLine )
2020-10-16 20:13:18 +03:00
case ' ' :
curFileLinesCount ++
if curFileLinesCount >= maxLines {
curFile . IsIncomplete = true
continue
}
diffLine := & DiffLine { Type : DiffLinePlain , LeftIdx : leftLine , RightIdx : rightLine }
leftLine ++
rightLine ++
curSection . Lines = append ( curSection . Lines , diffLine )
default :
// This is unexpected
err = fmt . Errorf ( "Unexpected line in hunk: %s" , string ( lineBytes ) )
return
2014-04-13 05:35:36 +04:00
}
2020-10-16 20:13:18 +03:00
line := string ( lineBytes )
if isFragment {
curFile . IsIncomplete = true
for isFragment {
lineBytes , isFragment , err = input . ReadLine ( )
2019-11-28 12:05:57 +03:00
if err != nil {
2020-10-16 20:13:18 +03:00
// Now by the definition of ReadLine this cannot be io.EOF
err = fmt . Errorf ( "Unable to ReadLine: %v" , err )
return
2019-11-28 12:05:57 +03:00
}
}
2020-10-16 20:13:18 +03:00
}
2020-11-22 01:41:24 +03:00
if len ( line ) > maxLineCharacters {
curFile . IsIncomplete = true
line = line [ : maxLineCharacters ]
}
2020-10-16 20:13:18 +03:00
curSection . Lines [ len ( curSection . Lines ) - 1 ] . Content = line
2019-11-28 12:05:57 +03:00
2020-10-16 20:13:18 +03:00
// handle LFS
if line [ 1 : ] == models . LFSMetaFileIdentifier {
curFileLFSPrefix = true
} else if curFileLFSPrefix && strings . HasPrefix ( line [ 1 : ] , models . LFSMetaFileOidPrefix ) {
oid := strings . TrimPrefix ( line [ 1 : ] , models . LFSMetaFileOidPrefix )
if len ( oid ) == 64 {
m := & models . LFSMetaObject { Oid : oid }
count , err := models . Count ( m )
2015-12-02 09:10:13 +03:00
2020-10-16 20:13:18 +03:00
if err == nil && count > 0 {
curFile . IsBin = true
curFile . IsLFSFile = true
curSection . Lines = nil
2014-04-13 05:35:36 +04:00
}
}
}
}
2020-10-16 20:13:18 +03:00
}
2014-04-13 05:35:36 +04:00
2020-10-16 20:13:18 +03:00
func createDiffFile ( diff * Diff , line string ) * DiffFile {
// The a/ and b/ filenames are the same unless rename/copy is involved.
// Especially, even for a creation or a deletion, /dev/null is not used
// in place of the a/ or b/ filenames.
//
// When rename/copy is involved, file1 and file2 show the name of the
// source file of the rename/copy and the name of the file that rename/copy
// produces, respectively.
//
// Path names are quoted if necessary.
//
// This means that you should always be able to determine the file name even when there
// there is potential ambiguity...
//
// but we can be simpler with our heuristics by just forcing git to prefix things nicely
curFile := & DiffFile {
Index : len ( diff . Files ) + 1 ,
Type : DiffFileChange ,
Sections : make ( [ ] * DiffSection , 0 , 10 ) ,
}
rd := strings . NewReader ( line [ len ( cmdDiffHead ) : ] + " " )
curFile . Type = DiffFileChange
curFile . OldName = readFileName ( rd )
curFile . Name = readFileName ( rd )
curFile . IsRenamed = curFile . Name != curFile . OldName
return curFile
}
func readFileName ( rd * strings . Reader ) string {
var name string
char , _ := rd . ReadByte ( )
_ = rd . UnreadByte ( )
if char == '"' {
fmt . Fscanf ( rd , "%q " , & name )
if name [ 0 ] == '\\' {
name = name [ 1 : ]
2014-12-09 10:18:25 +03:00
}
2020-10-16 20:13:18 +03:00
} else {
fmt . Fscanf ( rd , "%s " , & name )
2014-12-09 10:18:25 +03:00
}
2020-10-16 20:13:18 +03:00
return name [ 2 : ]
2014-04-13 05:35:36 +04:00
}
2016-11-24 11:30:08 +03:00
// GetDiffRange builds a Diff between two commits of a repository.
// passing the empty string as beforeCommitID returns a diff from the
// parent commit.
2017-01-05 03:50:34 +03:00
func GetDiffRange ( repoPath , beforeCommitID , afterCommitID string , maxLines , maxLineCharacters , maxFiles int ) ( * Diff , error ) {
2018-08-14 20:49:33 +03:00
return GetDiffRangeWithWhitespaceBehavior ( repoPath , beforeCommitID , afterCommitID , maxLines , maxLineCharacters , maxFiles , "" )
}
// GetDiffRangeWithWhitespaceBehavior builds a Diff between two commits of a repository.
// Passing the empty string as beforeCommitID returns a diff from the parent commit.
// The whitespaceBehavior is either an empty string or a git flag
func GetDiffRangeWithWhitespaceBehavior ( repoPath , beforeCommitID , afterCommitID string , maxLines , maxLineCharacters , maxFiles int , whitespaceBehavior string ) ( * Diff , error ) {
2016-07-30 18:39:58 +03:00
gitRepo , err := git . OpenRepository ( repoPath )
2014-04-13 05:35:36 +04:00
if err != nil {
return nil , err
}
2019-11-13 10:01:19 +03:00
defer gitRepo . Close ( )
2014-04-13 05:35:36 +04:00
2016-07-30 18:39:58 +03:00
commit , err := gitRepo . GetCommit ( afterCommitID )
2014-04-13 05:35:36 +04:00
if err != nil {
return nil , err
}
2019-11-30 17:40:22 +03:00
// FIXME: graceful: These commands should likely have a timeout
ctx , cancel := context . WithCancel ( git . DefaultContext )
defer cancel ( )
2014-05-29 06:15:15 +04:00
var cmd * exec . Cmd
2020-05-30 00:14:00 +03:00
if ( len ( beforeCommitID ) == 0 || beforeCommitID == git . EmptySHA ) && commit . ParentCount ( ) == 0 {
2020-10-22 02:08:44 +03:00
diffArgs := [ ] string { "diff" , "--src-prefix=\\a/" , "--dst-prefix=\\b/" , "-M" }
if len ( whitespaceBehavior ) != 0 {
diffArgs = append ( diffArgs , whitespaceBehavior )
}
// append empty tree ref
diffArgs = append ( diffArgs , "4b825dc642cb6eb9a060e54bf8d69288fbee4904" )
diffArgs = append ( diffArgs , afterCommitID )
cmd = exec . CommandContext ( ctx , git . GitExecutable , diffArgs ... )
2014-05-29 06:15:15 +04:00
} else {
2018-08-14 20:49:33 +03:00
actualBeforeCommitID := beforeCommitID
if len ( actualBeforeCommitID ) == 0 {
parentCommit , _ := commit . Parent ( 0 )
actualBeforeCommitID = parentCommit . ID . String ( )
}
2020-10-14 07:49:33 +03:00
diffArgs := [ ] string { "diff" , "--src-prefix=\\a/" , "--dst-prefix=\\b/" , "-M" }
2018-08-14 20:49:33 +03:00
if len ( whitespaceBehavior ) != 0 {
diffArgs = append ( diffArgs , whitespaceBehavior )
}
diffArgs = append ( diffArgs , actualBeforeCommitID )
diffArgs = append ( diffArgs , afterCommitID )
2019-11-30 17:40:22 +03:00
cmd = exec . CommandContext ( ctx , git . GitExecutable , diffArgs ... )
2019-11-15 05:52:59 +03:00
beforeCommitID = actualBeforeCommitID
2014-04-13 05:35:36 +04:00
}
2014-05-29 06:15:15 +04:00
cmd . Dir = repoPath
cmd . Stderr = os . Stderr
2014-07-07 01:32:36 +04:00
2015-12-02 09:10:13 +03:00
stdout , err := cmd . StdoutPipe ( )
if err != nil {
return nil , fmt . Errorf ( "StdoutPipe: %v" , err )
}
if err = cmd . Start ( ) ; err != nil {
return nil , fmt . Errorf ( "Start: %v" , err )
}
2014-07-07 01:32:36 +04:00
2019-11-30 17:40:22 +03:00
pid := process . GetManager ( ) . Add ( fmt . Sprintf ( "GetDiffRange [repo_path: %s]" , repoPath ) , cancel )
2017-01-17 08:58:58 +03:00
defer process . GetManager ( ) . Remove ( pid )
2015-12-02 09:10:13 +03:00
2017-01-05 03:50:34 +03:00
diff , err := ParsePatch ( maxLines , maxLineCharacters , maxFiles , stdout )
2015-12-02 09:10:13 +03:00
if err != nil {
return nil , fmt . Errorf ( "ParsePatch: %v" , err )
}
2019-11-15 05:52:59 +03:00
for _ , diffFile := range diff . Files {
tailSection := diffFile . GetTailSection ( gitRepo , beforeCommitID , afterCommitID )
if tailSection != nil {
diffFile . Sections = append ( diffFile . Sections , tailSection )
}
}
2015-12-02 09:10:13 +03:00
if err = cmd . Wait ( ) ; err != nil {
return nil , fmt . Errorf ( "Wait: %v" , err )
}
2020-05-30 00:14:00 +03:00
shortstatArgs := [ ] string { beforeCommitID + "..." + afterCommitID }
if len ( beforeCommitID ) == 0 || beforeCommitID == git . EmptySHA {
shortstatArgs = [ ] string { git . EmptyTreeSHA , afterCommitID }
}
diff . NumFiles , diff . TotalAddition , diff . TotalDeletion , err = git . GetDiffShortStat ( repoPath , shortstatArgs ... )
2020-07-29 20:53:04 +03:00
if err != nil && strings . Contains ( err . Error ( ) , "no merge base" ) {
// git >= 2.28 now returns an error if base and head have become unrelated.
// previously it would return the results of git diff --shortstat base head so let's try that...
shortstatArgs = [ ] string { beforeCommitID , afterCommitID }
diff . NumFiles , diff . TotalAddition , diff . TotalDeletion , err = git . GetDiffShortStat ( repoPath , shortstatArgs ... )
}
2020-05-26 08:58:07 +03:00
if err != nil {
return nil , err
}
2015-12-02 09:10:13 +03:00
return diff , nil
2014-04-13 05:35:36 +04:00
}
2016-07-30 18:02:22 +03:00
2016-11-24 11:30:08 +03:00
// GetDiffCommit builds a Diff representing the given commitID.
2017-01-05 03:50:34 +03:00
func GetDiffCommit ( repoPath , commitID string , maxLines , maxLineCharacters , maxFiles int ) ( * Diff , error ) {
return GetDiffRange ( repoPath , "" , commitID , maxLines , maxLineCharacters , maxFiles )
2014-08-26 16:20:18 +04:00
}
2019-09-06 05:20:09 +03:00
// CommentAsDiff returns c.Patch as *Diff
func CommentAsDiff ( c * models . Comment ) ( * Diff , error ) {
diff , err := ParsePatch ( setting . Git . MaxGitDiffLines ,
setting . Git . MaxGitDiffLineCharacters , setting . Git . MaxGitDiffFiles , strings . NewReader ( c . Patch ) )
if err != nil {
return nil , err
}
if len ( diff . Files ) == 0 {
return nil , fmt . Errorf ( "no file found for comment ID: %d" , c . ID )
}
secs := diff . Files [ 0 ] . Sections
if len ( secs ) == 0 {
return nil , fmt . Errorf ( "no sections found for comment ID: %d" , c . ID )
}
return diff , nil
}
// CommentMustAsDiff executes AsDiff and logs the error instead of returning
func CommentMustAsDiff ( c * models . Comment ) * Diff {
diff , err := CommentAsDiff ( c )
if err != nil {
log . Warn ( "CommentMustAsDiff: %v" , err )
}
return diff
}