2020-12-17 14:00:47 +00:00
// Copyright 2020 The Gitea Authors. All rights reserved.
2022-11-27 13:20:29 -05:00
// SPDX-License-Identifier: MIT
2020-12-17 14:00:47 +00:00
2021-08-24 11:47:09 -05:00
//go:build gogit
2020-12-17 14:00:47 +00:00
package git
import (
"bytes"
"io"
"code.gitea.io/gitea/modules/analyze"
2024-02-23 18:24:27 +01:00
"code.gitea.io/gitea/modules/optional"
2020-12-17 14:00:47 +00:00
"github.com/go-enry/go-enry/v2"
"github.com/go-git/go-git/v5"
"github.com/go-git/go-git/v5/plumbing"
"github.com/go-git/go-git/v5/plumbing/object"
)
// GetLanguageStats calculates language stats for git repository at specified commit
func ( repo * Repository ) GetLanguageStats ( commitID string ) ( map [ string ] int64 , error ) {
r , err := git . PlainOpen ( repo . Path )
if err != nil {
return nil , err
}
rev , err := r . ResolveRevision ( plumbing . Revision ( commitID ) )
if err != nil {
return nil , err
}
commit , err := r . CommitObject ( * rev )
if err != nil {
return nil , err
}
tree , err := commit . Tree ( )
if err != nil {
return nil , err
}
2022-06-16 23:47:44 +08:00
checker , deferable := repo . CheckAttributeReader ( commitID )
defer deferable ( )
2021-09-09 21:13:36 +01:00
2022-10-29 09:04:21 +02:00
// sizes contains the current calculated size of all files by language
2020-12-17 14:00:47 +00:00
sizes := make ( map [ string ] int64 )
2022-10-29 09:04:21 +02:00
// by default we will only count the sizes of programming languages or markup languages
// unless they are explicitly set using linguist-language
includedLanguage := map [ string ] bool { }
// or if there's only one language in the repository
firstExcludedLanguage := ""
firstExcludedLanguageSize := int64 ( 0 )
2020-12-17 14:00:47 +00:00
err = tree . Files ( ) . ForEach ( func ( f * object . File ) error {
2021-09-09 21:13:36 +01:00
if f . Size == 0 {
return nil
}
2024-02-23 18:24:27 +01:00
isVendored := optional . None [ bool ] ( )
isGenerated := optional . None [ bool ] ( )
isDocumentation := optional . None [ bool ] ( )
isDetectable := optional . None [ bool ] ( )
2021-09-09 21:13:36 +01:00
if checker != nil {
attrs , err := checker . CheckPath ( f . Name )
if err == nil {
2024-02-24 19:46:49 +01:00
isVendored = AttributeToBool ( attrs , AttributeLinguistVendored )
2024-02-23 18:24:27 +01:00
if isVendored . ValueOrDefault ( false ) {
return nil
}
2024-02-24 19:46:49 +01:00
isGenerated = AttributeToBool ( attrs , AttributeLinguistGenerated )
2024-02-23 18:24:27 +01:00
if isGenerated . ValueOrDefault ( false ) {
return nil
2021-09-09 21:13:36 +01:00
}
2024-02-23 18:24:27 +01:00
2024-02-24 19:46:49 +01:00
isDocumentation = AttributeToBool ( attrs , AttributeLinguistDocumentation )
2024-02-23 18:24:27 +01:00
if isDocumentation . ValueOrDefault ( false ) {
return nil
}
2024-02-24 19:46:49 +01:00
isDetectable = AttributeToBool ( attrs , AttributeLinguistDetectable )
2024-02-23 18:24:27 +01:00
if ! isDetectable . ValueOrDefault ( true ) {
return nil
}
2024-02-24 19:46:49 +01:00
hasLanguage := TryReadLanguageAttribute ( attrs )
2024-02-23 18:24:27 +01:00
if hasLanguage . Value ( ) != "" {
language := hasLanguage . Value ( )
2021-09-09 21:13:36 +01:00
// group languages, such as Pug -> HTML; SCSS -> CSS
group := enry . GetLanguageGroup ( language )
2021-09-21 03:46:51 +08:00
if len ( group ) != 0 {
2021-09-09 21:13:36 +01:00
language = group
}
2022-10-29 09:04:21 +02:00
// this language will always be added to the size
2021-09-09 21:13:36 +01:00
sizes [ language ] += f . Size
return nil
}
}
}
2024-02-23 18:24:27 +01:00
if ( ! isVendored . Has ( ) && analyze . IsVendor ( f . Name ) ) ||
enry . IsDotFile ( f . Name ) ||
( ! isDocumentation . Has ( ) && enry . IsDocumentation ( f . Name ) ) ||
enry . IsConfiguration ( f . Name ) {
2020-12-17 14:00:47 +00:00
return nil
}
// If content can not be read or file is too big just do detection by filename
var content [ ] byte
if f . Size <= bigFileSize {
content , _ = readFile ( f , fileSizeLimit )
}
2024-02-23 18:24:27 +01:00
if ! isGenerated . Has ( ) && enry . IsGenerated ( f . Name , content ) {
2020-12-17 14:00:47 +00:00
return nil
}
language := analyze . GetCodeLanguage ( f . Name , content )
if language == enry . OtherLanguage || language == "" {
return nil
}
// group languages, such as Pug -> HTML; SCSS -> CSS
group := enry . GetLanguageGroup ( language )
if group != "" {
language = group
}
2022-10-29 09:04:21 +02:00
included , checked := includedLanguage [ language ]
if ! checked {
langtype := enry . GetLanguageType ( language )
included = langtype == enry . Programming || langtype == enry . Markup
includedLanguage [ language ] = included
}
2024-02-23 18:24:27 +01:00
if included || isDetectable . ValueOrDefault ( false ) {
2022-10-29 09:04:21 +02:00
sizes [ language ] += f . Size
} else if len ( sizes ) == 0 && ( firstExcludedLanguage == "" || firstExcludedLanguage == language ) {
firstExcludedLanguage = language
firstExcludedLanguageSize += f . Size
}
2020-12-17 14:00:47 +00:00
return nil
} )
if err != nil {
return nil , err
}
2022-10-29 09:04:21 +02:00
// If there are no included languages add the first excluded language
if len ( sizes ) == 0 && firstExcludedLanguage != "" {
sizes [ firstExcludedLanguage ] = firstExcludedLanguageSize
2020-12-17 14:00:47 +00:00
}
2023-05-25 03:37:36 +08:00
return mergeLanguageStats ( sizes ) , nil
2020-12-17 14:00:47 +00:00
}
func readFile ( f * object . File , limit int64 ) ( [ ] byte , error ) {
r , err := f . Reader ( )
if err != nil {
return nil , err
}
defer r . Close ( )
if limit <= 0 {
2021-09-22 13:38:34 +08:00
return io . ReadAll ( r )
2020-12-17 14:00:47 +00:00
}
size := f . Size
if limit > 0 && size > limit {
size = limit
}
buf := bytes . NewBuffer ( nil )
buf . Grow ( int ( size ) )
_ , err = io . Copy ( buf , io . LimitReader ( r , limit ) )
return buf . Bytes ( ) , err
}