improve performance of diffs (#32393)

This has two major changes that significantly reduce the amount of work
done for large diffs:

* Kill a running git process when reaching the maximum number of files
in a diff, preventing it from processing the entire diff.
* When loading a diff with the URL param `file-only=true`, skip loading
stats. This speeds up loading both hidden files of a diff and sections
of a diff when clicking the "Show More" button.

A couple of minor things from profiling are also included:

* Reuse existing repo in `PrepareViewPullInfo` if head and base are the
same.

The performance impact is going to depend heavily on the individual diff
and the hardware it runs on, but when testing locally on a diff changing
100k+ lines over hundreds of files, I'm seeing a roughly 75% reduction
in time to load the result of "Show More"

---------

Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
(cherry picked from commit 7dcccc3bb19655a6f83dd495ffc332708d0c8678)
This commit is contained in:
Rowan Bohde 2024-11-01 22:29:37 -05:00 committed by Gergely Nagy
parent 748ae10e7c
commit befafe9a05
No known key found for this signature in database
4 changed files with 34 additions and 37 deletions

View File

@ -338,6 +338,7 @@ func Diff(ctx *context.Context) {
MaxLineCharacters: setting.Git.MaxGitDiffLineCharacters, MaxLineCharacters: setting.Git.MaxGitDiffLineCharacters,
MaxFiles: maxFiles, MaxFiles: maxFiles,
WhitespaceBehavior: gitdiff.GetWhitespaceFlag(ctx.Data["WhitespaceBehavior"].(string)), WhitespaceBehavior: gitdiff.GetWhitespaceFlag(ctx.Data["WhitespaceBehavior"].(string)),
FileOnly: fileOnly,
}, files...) }, files...)
if err != nil { if err != nil {
ctx.NotFound("GetDiff", err) ctx.NotFound("GetDiff", err)

View File

@ -611,6 +611,8 @@ func PrepareCompareDiff(
maxLines, maxFiles = -1, -1 maxLines, maxFiles = -1, -1
} }
fileOnly := ctx.FormBool("file-only")
diff, err := gitdiff.GetDiff(ctx, ci.HeadGitRepo, diff, err := gitdiff.GetDiff(ctx, ci.HeadGitRepo,
&gitdiff.DiffOptions{ &gitdiff.DiffOptions{
BeforeCommitID: beforeCommitID, BeforeCommitID: beforeCommitID,
@ -621,6 +623,7 @@ func PrepareCompareDiff(
MaxFiles: maxFiles, MaxFiles: maxFiles,
WhitespaceBehavior: whitespaceBehavior, WhitespaceBehavior: whitespaceBehavior,
DirectComparison: ci.DirectComparison, DirectComparison: ci.DirectComparison,
FileOnly: fileOnly,
}, ctx.FormStrings("files")...) }, ctx.FormStrings("files")...)
if err != nil { if err != nil {
ctx.ServerError("GetDiffRangeWithWhitespaceBehavior", err) ctx.ServerError("GetDiffRangeWithWhitespaceBehavior", err)

View File

@ -614,12 +614,12 @@ func PrepareViewPullInfo(ctx *context.Context, issue *issues_model.Issue) *git.C
var headBranchSha string var headBranchSha string
// HeadRepo may be missing // HeadRepo may be missing
if pull.HeadRepo != nil { if pull.HeadRepo != nil {
headGitRepo, err := gitrepo.OpenRepository(ctx, pull.HeadRepo) headGitRepo, closer, err := gitrepo.RepositoryFromContextOrOpen(ctx, pull.HeadRepo)
if err != nil { if err != nil {
ctx.ServerError("OpenRepository", err) ctx.ServerError("RepositoryFromContextOrOpen", err)
return nil return nil
} }
defer headGitRepo.Close() defer closer.Close()
if pull.Flow == issues_model.PullRequestFlowGithub { if pull.Flow == issues_model.PullRequestFlowGithub {
headBranchExist = headGitRepo.IsBranchExist(pull.HeadBranch) headBranchExist = headGitRepo.IsBranchExist(pull.HeadBranch)
@ -966,6 +966,7 @@ func viewPullFiles(ctx *context.Context, specifiedStartCommit, specifiedEndCommi
MaxLineCharacters: setting.Git.MaxGitDiffLineCharacters, MaxLineCharacters: setting.Git.MaxGitDiffLineCharacters,
MaxFiles: maxFiles, MaxFiles: maxFiles,
WhitespaceBehavior: gitdiff.GetWhitespaceFlag(ctx.Data["WhitespaceBehavior"].(string)), WhitespaceBehavior: gitdiff.GetWhitespaceFlag(ctx.Data["WhitespaceBehavior"].(string)),
FileOnly: fileOnly,
} }
if !willShowSpecifiedCommit { if !willShowSpecifiedCommit {

View File

@ -379,18 +379,11 @@ func (diffFile *DiffFile) GetType() int {
} }
// GetTailSection creates a fake DiffLineSection if the last section is not the end of the file // GetTailSection creates a fake DiffLineSection if the last section is not the end of the file
func (diffFile *DiffFile) GetTailSection(gitRepo *git.Repository, leftCommitID, rightCommitID string) *DiffSection { func (diffFile *DiffFile) GetTailSection(gitRepo *git.Repository, leftCommit, rightCommit *git.Commit) *DiffSection {
if len(diffFile.Sections) == 0 || diffFile.Type != DiffFileChange || diffFile.IsBin || diffFile.IsLFSFile { if len(diffFile.Sections) == 0 || diffFile.Type != DiffFileChange || diffFile.IsBin || diffFile.IsLFSFile {
return nil return nil
} }
leftCommit, err := gitRepo.GetCommit(leftCommitID)
if err != nil {
return nil
}
rightCommit, err := gitRepo.GetCommit(rightCommitID)
if err != nil {
return nil
}
lastSection := diffFile.Sections[len(diffFile.Sections)-1] lastSection := diffFile.Sections[len(diffFile.Sections)-1]
lastLine := lastSection.Lines[len(lastSection.Lines)-1] lastLine := lastSection.Lines[len(lastSection.Lines)-1]
leftLineCount := getCommitFileLineCount(leftCommit, diffFile.Name) leftLineCount := getCommitFileLineCount(leftCommit, diffFile.Name)
@ -532,11 +525,6 @@ parsingLoop:
lastFile := createDiffFile(diff, line) lastFile := createDiffFile(diff, line)
diff.End = lastFile.Name diff.End = lastFile.Name
diff.IsIncomplete = true diff.IsIncomplete = true
_, err := io.Copy(io.Discard, reader)
if err != nil {
// By the definition of io.Copy this never returns io.EOF
return diff, fmt.Errorf("error during io.Copy: %w", err)
}
break parsingLoop break parsingLoop
} }
@ -1097,6 +1085,7 @@ type DiffOptions struct {
MaxFiles int MaxFiles int
WhitespaceBehavior git.TrustedCmdArgs WhitespaceBehavior git.TrustedCmdArgs
DirectComparison bool DirectComparison bool
FileOnly bool
} }
// GetDiff builds a Diff between two commits of a repository. // GetDiff builds a Diff between two commits of a repository.
@ -1105,12 +1094,16 @@ type DiffOptions struct {
func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) { func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) {
repoPath := gitRepo.Path repoPath := gitRepo.Path
var beforeCommit *git.Commit
commit, err := gitRepo.GetCommit(opts.AfterCommitID) commit, err := gitRepo.GetCommit(opts.AfterCommitID)
if err != nil { if err != nil {
return nil, err return nil, err
} }
cmdDiff := git.NewCommand(gitRepo.Ctx) cmdCtx, cmdCancel := context.WithCancel(ctx)
defer cmdCancel()
cmdDiff := git.NewCommand(cmdCtx)
objectFormat, err := gitRepo.GetObjectFormat() objectFormat, err := gitRepo.GetObjectFormat()
if err != nil { if err != nil {
return nil, err return nil, err
@ -1132,6 +1125,12 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi
AddArguments(opts.WhitespaceBehavior...). AddArguments(opts.WhitespaceBehavior...).
AddDynamicArguments(actualBeforeCommitID, opts.AfterCommitID) AddDynamicArguments(actualBeforeCommitID, opts.AfterCommitID)
opts.BeforeCommitID = actualBeforeCommitID opts.BeforeCommitID = actualBeforeCommitID
var err error
beforeCommit, err = gitRepo.GetCommit(opts.BeforeCommitID)
if err != nil {
return nil, err
}
} }
// In git 2.31, git diff learned --skip-to which we can use to shortcut skip to file // In git 2.31, git diff learned --skip-to which we can use to shortcut skip to file
@ -1166,7 +1165,9 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi
_ = writer.Close() _ = writer.Close()
}() }()
diff, err := ParsePatch(ctx, opts.MaxLines, opts.MaxLineCharacters, opts.MaxFiles, reader, parsePatchSkipToFile) diff, err := ParsePatch(cmdCtx, opts.MaxLines, opts.MaxLineCharacters, opts.MaxFiles, reader, parsePatchSkipToFile)
// Ensure the git process is killed if it didn't exit already
cmdCancel()
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to ParsePatch: %w", err) return nil, fmt.Errorf("unable to ParsePatch: %w", err)
} }
@ -1207,37 +1208,28 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi
diffFile.IsGenerated = analyze.IsGenerated(diffFile.Name) diffFile.IsGenerated = analyze.IsGenerated(diffFile.Name)
} }
tailSection := diffFile.GetTailSection(gitRepo, opts.BeforeCommitID, opts.AfterCommitID) tailSection := diffFile.GetTailSection(gitRepo, beforeCommit, commit)
if tailSection != nil { if tailSection != nil {
diffFile.Sections = append(diffFile.Sections, tailSection) diffFile.Sections = append(diffFile.Sections, tailSection)
} }
} }
separator := "..." if opts.FileOnly {
if opts.DirectComparison { return diff, nil
separator = ".."
} }
diffPaths := []string{opts.BeforeCommitID + separator + opts.AfterCommitID} stats, err := GetPullDiffStats(gitRepo, opts)
if len(opts.BeforeCommitID) == 0 || opts.BeforeCommitID == objectFormat.EmptyObjectID().String() {
diffPaths = []string{objectFormat.EmptyTree().String(), opts.AfterCommitID}
}
diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...)
if err != nil && strings.Contains(err.Error(), "no merge base") {
// git >= 2.28 now returns an error if base and head have become unrelated.
// previously it would return the results of git diff --shortstat base head so let's try that...
diffPaths = []string{opts.BeforeCommitID, opts.AfterCommitID}
diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...)
}
if err != nil { if err != nil {
return nil, err return nil, err
} }
diff.NumFiles, diff.TotalAddition, diff.TotalDeletion = stats.NumFiles, stats.TotalAddition, stats.TotalDeletion
return diff, nil return diff, nil
} }
type PullDiffStats struct { type PullDiffStats struct {
TotalAddition, TotalDeletion int NumFiles, TotalAddition, TotalDeletion int
} }
// GetPullDiffStats // GetPullDiffStats
@ -1261,12 +1253,12 @@ func GetPullDiffStats(gitRepo *git.Repository, opts *DiffOptions) (*PullDiffStat
diffPaths = []string{objectFormat.EmptyTree().String(), opts.AfterCommitID} diffPaths = []string{objectFormat.EmptyTree().String(), opts.AfterCommitID}
} }
_, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...) diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...)
if err != nil && strings.Contains(err.Error(), "no merge base") { if err != nil && strings.Contains(err.Error(), "no merge base") {
// git >= 2.28 now returns an error if base and head have become unrelated. // git >= 2.28 now returns an error if base and head have become unrelated.
// previously it would return the results of git diff --shortstat base head so let's try that... // previously it would return the results of git diff --shortstat base head so let's try that...
diffPaths = []string{opts.BeforeCommitID, opts.AfterCommitID} diffPaths = []string{opts.BeforeCommitID, opts.AfterCommitID}
_, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...) diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...)
} }
if err != nil { if err != nil {
return nil, err return nil, err