2024-02-16 01:21:13 +03:00
// Copyright 2023 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package repository
import (
"bufio"
"context"
"errors"
"fmt"
"os"
"strconv"
"strings"
"sync"
"time"
"code.gitea.io/gitea/models/avatars"
repo_model "code.gitea.io/gitea/models/repo"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/gitrepo"
"code.gitea.io/gitea/modules/graceful"
2024-04-11 13:50:04 +03:00
"code.gitea.io/gitea/modules/json"
2024-02-16 01:21:13 +03:00
"code.gitea.io/gitea/modules/log"
api "code.gitea.io/gitea/modules/structs"
"gitea.com/go-chi/cache"
)
const (
contributorStatsCacheKey = "GetContributorStats/%s/%s"
contributorStatsCacheTimeout int64 = 60 * 10
)
var (
ErrAwaitGeneration = errors . New ( "generation took longer than " )
awaitGenerationTime = time . Second * 5
generateLock = sync . Map { }
)
type WeekData struct {
Week int64 ` json:"week" ` // Starting day of the week as Unix timestamp
Additions int ` json:"additions" ` // Number of additions in that week
Deletions int ` json:"deletions" ` // Number of deletions in that week
Commits int ` json:"commits" ` // Number of commits in that week
}
// ContributorData represents statistical git commit count data
type ContributorData struct {
Name string ` json:"name" ` // Display name of the contributor
Login string ` json:"login" ` // Login name of the contributor in case it exists
AvatarLink string ` json:"avatar_link" `
HomeLink string ` json:"home_link" `
TotalCommits int64 ` json:"total_commits" `
Weeks map [ int64 ] * WeekData ` json:"weeks" `
}
// ExtendedCommitStats contains information for commit stats with author data
type ExtendedCommitStats struct {
Author * api . CommitUser ` json:"author" `
Stats * api . CommitStats ` json:"stats" `
}
const layout = time . DateOnly
func findLastSundayBeforeDate ( dateStr string ) ( string , error ) {
date , err := time . Parse ( layout , dateStr )
if err != nil {
return "" , err
}
weekday := date . Weekday ( )
daysToSubtract := int ( weekday ) - int ( time . Sunday )
if daysToSubtract < 0 {
daysToSubtract += 7
}
lastSunday := date . AddDate ( 0 , 0 , - daysToSubtract )
return lastSunday . Format ( layout ) , nil
}
// GetContributorStats returns contributors stats for git commits for given revision or default branch
func GetContributorStats ( ctx context . Context , cache cache . Cache , repo * repo_model . Repository , revision string ) ( map [ string ] * ContributorData , error ) {
// as GetContributorStats is resource intensive we cache the result
cacheKey := fmt . Sprintf ( contributorStatsCacheKey , repo . FullName ( ) , revision )
if ! cache . IsExist ( cacheKey ) {
genReady := make ( chan struct { } )
// dont start multible async generations
_ , run := generateLock . Load ( cacheKey )
if run {
return nil , ErrAwaitGeneration
}
generateLock . Store ( cacheKey , struct { } { } )
// run generation async
go generateContributorStats ( genReady , cache , cacheKey , repo , revision )
select {
case <- time . After ( awaitGenerationTime ) :
return nil , ErrAwaitGeneration
case <- genReady :
// we got generation ready before timeout
break
}
}
// TODO: renew timeout of cache cache.UpdateTimeout(cacheKey, contributorStatsCacheTimeout)
switch v := cache . Get ( cacheKey ) . ( type ) {
case error :
return nil , v
2024-04-11 13:50:04 +03:00
case string :
var cachedStats map [ string ] * ContributorData
return cachedStats , json . Unmarshal ( [ ] byte ( v ) , & cachedStats )
2024-02-16 01:21:13 +03:00
default :
return nil , fmt . Errorf ( "unexpected type in cache detected" )
}
}
// getExtendedCommitStats return the list of *ExtendedCommitStats for the given revision
func getExtendedCommitStats ( repo * git . Repository , revision string /*, limit int */ ) ( [ ] * ExtendedCommitStats , error ) {
baseCommit , err := repo . GetCommit ( revision )
if err != nil {
return nil , err
}
stdoutReader , stdoutWriter , err := os . Pipe ( )
if err != nil {
return nil , err
}
defer func ( ) {
_ = stdoutReader . Close ( )
_ = stdoutWriter . Close ( )
} ( )
gitCmd := git . NewCommand ( repo . Ctx , "log" , "--shortstat" , "--no-merges" , "--pretty=format:---%n%aN%n%aE%n%as" , "--reverse" )
// AddOptionFormat("--max-count=%d", limit)
gitCmd . AddDynamicArguments ( baseCommit . ID . String ( ) )
var extendedCommitStats [ ] * ExtendedCommitStats
stderr := new ( strings . Builder )
err = gitCmd . Run ( & git . RunOpts {
Dir : repo . Path ,
Stdout : stdoutWriter ,
Stderr : stderr ,
PipelineFunc : func ( ctx context . Context , cancel context . CancelFunc ) error {
_ = stdoutWriter . Close ( )
scanner := bufio . NewScanner ( stdoutReader )
for scanner . Scan ( ) {
line := strings . TrimSpace ( scanner . Text ( ) )
if line != "---" {
continue
}
scanner . Scan ( )
authorName := strings . TrimSpace ( scanner . Text ( ) )
scanner . Scan ( )
authorEmail := strings . TrimSpace ( scanner . Text ( ) )
scanner . Scan ( )
date := strings . TrimSpace ( scanner . Text ( ) )
scanner . Scan ( )
stats := strings . TrimSpace ( scanner . Text ( ) )
if authorName == "" || authorEmail == "" || date == "" || stats == "" {
// FIXME: find a better way to parse the output so that we will handle this properly
log . Warn ( "Something is wrong with git log output, skipping..." )
log . Warn ( "authorName: %s, authorEmail: %s, date: %s, stats: %s" , authorName , authorEmail , date , stats )
continue
}
// 1 file changed, 1 insertion(+), 1 deletion(-)
fields := strings . Split ( stats , "," )
commitStats := api . CommitStats { }
for _ , field := range fields [ 1 : ] {
parts := strings . Split ( strings . TrimSpace ( field ) , " " )
value , contributionType := parts [ 0 ] , parts [ 1 ]
amount , _ := strconv . Atoi ( value )
if strings . HasPrefix ( contributionType , "insertion" ) {
commitStats . Additions = amount
} else {
commitStats . Deletions = amount
}
}
commitStats . Total = commitStats . Additions + commitStats . Deletions
scanner . Text ( ) // empty line at the end
res := & ExtendedCommitStats {
Author : & api . CommitUser {
Identity : api . Identity {
Name : authorName ,
Email : authorEmail ,
} ,
Date : date ,
} ,
Stats : & commitStats ,
}
extendedCommitStats = append ( extendedCommitStats , res )
}
_ = stdoutReader . Close ( )
return nil
} ,
} )
if err != nil {
return nil , fmt . Errorf ( "Failed to get ContributorsCommitStats for repository.\nError: %w\nStderr: %s" , err , stderr )
}
return extendedCommitStats , nil
}
func generateContributorStats ( genDone chan struct { } , cache cache . Cache , cacheKey string , repo * repo_model . Repository , revision string ) {
ctx := graceful . GetManager ( ) . HammerContext ( )
gitRepo , closer , err := gitrepo . RepositoryFromContextOrOpen ( ctx , repo )
if err != nil {
err := fmt . Errorf ( "OpenRepository: %w" , err )
_ = cache . Put ( cacheKey , err , contributorStatsCacheTimeout )
return
}
defer closer . Close ( )
if len ( revision ) == 0 {
revision = repo . DefaultBranch
}
extendedCommitStats , err := getExtendedCommitStats ( gitRepo , revision )
if err != nil {
err := fmt . Errorf ( "ExtendedCommitStats: %w" , err )
_ = cache . Put ( cacheKey , err , contributorStatsCacheTimeout )
return
}
if len ( extendedCommitStats ) == 0 {
err := fmt . Errorf ( "no commit stats returned for revision '%s'" , revision )
_ = cache . Put ( cacheKey , err , contributorStatsCacheTimeout )
return
}
layout := time . DateOnly
unknownUserAvatarLink := user_model . NewGhostUser ( ) . AvatarLinkWithSize ( ctx , 0 )
contributorsCommitStats := make ( map [ string ] * ContributorData )
contributorsCommitStats [ "total" ] = & ContributorData {
Name : "Total" ,
Weeks : make ( map [ int64 ] * WeekData ) ,
}
total := contributorsCommitStats [ "total" ]
for _ , v := range extendedCommitStats {
userEmail := v . Author . Email
if len ( userEmail ) == 0 {
continue
}
u , _ := user_model . GetUserByEmail ( ctx , userEmail )
if u != nil {
// update userEmail with user's primary email address so
// that different mail addresses will linked to same account
userEmail = u . GetEmail ( )
}
// duplicated logic
if _ , ok := contributorsCommitStats [ userEmail ] ; ! ok {
if u == nil {
avatarLink := avatars . GenerateEmailAvatarFastLink ( ctx , userEmail , 0 )
if avatarLink == "" {
avatarLink = unknownUserAvatarLink
}
contributorsCommitStats [ userEmail ] = & ContributorData {
Name : v . Author . Name ,
AvatarLink : avatarLink ,
Weeks : make ( map [ int64 ] * WeekData ) ,
}
} else {
contributorsCommitStats [ userEmail ] = & ContributorData {
Name : u . DisplayName ( ) ,
Login : u . LowerName ,
AvatarLink : u . AvatarLinkWithSize ( ctx , 0 ) ,
HomeLink : u . HomeLink ( ) ,
Weeks : make ( map [ int64 ] * WeekData ) ,
}
}
}
// Update user statistics
user := contributorsCommitStats [ userEmail ]
startingOfWeek , _ := findLastSundayBeforeDate ( v . Author . Date )
val , _ := time . Parse ( layout , startingOfWeek )
week := val . UnixMilli ( )
if user . Weeks [ week ] == nil {
user . Weeks [ week ] = & WeekData {
Additions : 0 ,
Deletions : 0 ,
Commits : 0 ,
Week : week ,
}
}
if total . Weeks [ week ] == nil {
total . Weeks [ week ] = & WeekData {
Additions : 0 ,
Deletions : 0 ,
Commits : 0 ,
Week : week ,
}
}
user . Weeks [ week ] . Additions += v . Stats . Additions
user . Weeks [ week ] . Deletions += v . Stats . Deletions
user . Weeks [ week ] . Commits ++
user . TotalCommits ++
// Update overall statistics
total . Weeks [ week ] . Additions += v . Stats . Additions
total . Weeks [ week ] . Deletions += v . Stats . Deletions
total . Weeks [ week ] . Commits ++
total . TotalCommits ++
}
2024-04-11 13:50:04 +03:00
data , err := json . Marshal ( contributorsCommitStats )
if err != nil {
err := fmt . Errorf ( "couldn't marshal the data: %w" , err )
_ = cache . Put ( cacheKey , err , contributorStatsCacheTimeout )
return
}
// Store the data as an string, to make it uniform what data type is returned
// from caches.
_ = cache . Put ( cacheKey , string ( data ) , contributorStatsCacheTimeout )
2024-02-16 01:21:13 +03:00
generateLock . Delete ( cacheKey )
if genDone != nil {
genDone <- struct { } { }
}
}