2019-05-04 15:39:03 +03:00
// Copyright 2019 The Gitea Authors. All rights reserved.
2022-11-27 21:20:29 +03:00
// SPDX-License-Identifier: MIT
2019-05-04 15:39:03 +03:00
package git
import (
"bufio"
2020-09-05 23:12:14 +03:00
"context"
2019-05-04 15:39:03 +03:00
"fmt"
2020-09-05 23:12:14 +03:00
"os"
2020-01-20 13:07:30 +03:00
"sort"
2019-05-04 15:39:03 +03:00
"strconv"
"strings"
"time"
2022-10-12 08:18:26 +03:00
"code.gitea.io/gitea/modules/container"
2019-05-04 15:39:03 +03:00
)
// CodeActivityStats represents git statistics data
type CodeActivityStats struct {
AuthorCount int64
CommitCount int64
ChangedFiles int64
Additions int64
Deletions int64
CommitCountInAllBranches int64
2020-01-20 13:07:30 +03:00
Authors [ ] * CodeActivityAuthor
}
// CodeActivityAuthor represents git statistics data for commit authors
type CodeActivityAuthor struct {
Name string
Email string
Commits int64
2019-05-04 15:39:03 +03:00
}
2021-07-08 14:38:13 +03:00
// GetCodeActivityStats returns code statistics for activity page
2019-05-04 15:39:03 +03:00
func ( repo * Repository ) GetCodeActivityStats ( fromTime time . Time , branch string ) ( * CodeActivityStats , error ) {
stats := & CodeActivityStats { }
since := fromTime . Format ( time . RFC3339 )
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 05:30:43 +03:00
stdout , _ , runErr := NewCommand ( repo . Ctx , "rev-list" , "--count" , "--no-merges" , "--branches=*" , "--date=iso" ) . AddOptionFormat ( "--since='%s'" , since ) . RunStdString ( & RunOpts { Dir : repo . Path } )
2022-04-01 05:55:30 +03:00
if runErr != nil {
return nil , runErr
2019-05-04 15:39:03 +03:00
}
2022-04-01 05:55:30 +03:00
c , err := strconv . ParseInt ( strings . TrimSpace ( stdout ) , 10 , 64 )
2019-05-04 15:39:03 +03:00
if err != nil {
return nil , err
}
stats . CommitCountInAllBranches = c
2020-09-05 23:12:14 +03:00
stdoutReader , stdoutWriter , err := os . Pipe ( )
if err != nil {
return nil , err
}
defer func ( ) {
_ = stdoutReader . Close ( )
_ = stdoutWriter . Close ( )
} ( )
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 05:30:43 +03:00
gitCmd := NewCommand ( repo . Ctx , "log" , "--numstat" , "--no-merges" , "--pretty=format:---%n%h%n%aN%n%aE%n" , "--date=iso" ) . AddOptionFormat ( "--since='%s'" , since )
2019-05-04 15:39:03 +03:00
if len ( branch ) == 0 {
2022-10-15 13:49:26 +03:00
gitCmd . AddArguments ( "--branches=*" )
2019-05-04 15:39:03 +03:00
} else {
2022-10-15 13:49:26 +03:00
gitCmd . AddArguments ( "--first-parent" ) . AddDynamicArguments ( branch )
2019-05-04 15:39:03 +03:00
}
2020-09-05 23:12:14 +03:00
stderr := new ( strings . Builder )
2022-10-15 13:49:26 +03:00
err = gitCmd . Run ( & RunOpts {
2022-04-01 05:55:30 +03:00
Env : [ ] string { } ,
Dir : repo . Path ,
Stdout : stdoutWriter ,
Stderr : stderr ,
2022-02-11 15:47:22 +03:00
PipelineFunc : func ( ctx context . Context , cancel context . CancelFunc ) error {
2020-09-05 23:12:14 +03:00
_ = stdoutWriter . Close ( )
scanner := bufio . NewScanner ( stdoutReader )
scanner . Split ( bufio . ScanLines )
stats . CommitCount = 0
stats . Additions = 0
stats . Deletions = 0
authors := make ( map [ string ] * CodeActivityAuthor )
2022-10-12 08:18:26 +03:00
files := make ( container . Set [ string ] )
2020-09-05 23:12:14 +03:00
var author string
p := 0
for scanner . Scan ( ) {
l := strings . TrimSpace ( scanner . Text ( ) )
if l == "---" {
p = 1
} else if p == 0 {
continue
} else {
p ++
2020-01-20 13:07:30 +03:00
}
2020-09-05 23:12:14 +03:00
if p > 4 && len ( l ) == 0 {
continue
2019-05-04 15:39:03 +03:00
}
2020-09-05 23:12:14 +03:00
switch p {
case 1 : // Separator
case 2 : // Commit sha-1
stats . CommitCount ++
case 3 : // Author
author = l
case 4 : // E-mail
email := strings . ToLower ( l )
if _ , ok := authors [ email ] ; ! ok {
2022-02-11 15:47:22 +03:00
authors [ email ] = & CodeActivityAuthor { Name : author , Email : email , Commits : 0 }
2020-09-05 23:12:14 +03:00
}
authors [ email ] . Commits ++
default : // Changed file
if parts := strings . Fields ( l ) ; len ( parts ) >= 3 {
if parts [ 0 ] != "-" {
if c , err := strconv . ParseInt ( strings . TrimSpace ( parts [ 0 ] ) , 10 , 64 ) ; err == nil {
stats . Additions += c
}
}
if parts [ 1 ] != "-" {
if c , err := strconv . ParseInt ( strings . TrimSpace ( parts [ 1 ] ) , 10 , 64 ) ; err == nil {
stats . Deletions += c
}
}
2022-10-12 08:18:26 +03:00
files . Add ( parts [ 2 ] )
2019-05-04 15:39:03 +03:00
}
}
}
2024-03-22 14:17:30 +03:00
if err = scanner . Err ( ) ; err != nil {
_ = stdoutReader . Close ( )
return fmt . Errorf ( "GetCodeActivityStats scan: %w" , err )
2024-03-19 05:20:36 +03:00
}
2020-09-05 23:12:14 +03:00
a := make ( [ ] * CodeActivityAuthor , 0 , len ( authors ) )
for _ , v := range authors {
a = append ( a , v )
}
// Sort authors descending depending on commit count
sort . Slice ( a , func ( i , j int ) bool {
return a [ i ] . Commits > a [ j ] . Commits
} )
stats . AuthorCount = int64 ( len ( authors ) )
stats . ChangedFiles = int64 ( len ( files ) )
stats . Authors = a
_ = stdoutReader . Close ( )
return nil
2022-02-11 15:47:22 +03:00
} ,
} )
2020-09-05 23:12:14 +03:00
if err != nil {
return nil , fmt . Errorf ( "Failed to get GetCodeActivityStats for repository.\nError: %w\nStderr: %s" , err , stderr )
2020-01-20 13:07:30 +03:00
}
2019-05-04 15:39:03 +03:00
return stats , nil
}