2020-12-17 14:00:47 +00:00
// Copyright 2020 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
2021-08-24 11:47:09 -05:00
//go:build gogit
2020-12-17 14:00:47 +00:00
package git
import (
"bytes"
"io"
2021-11-17 20:37:00 +00:00
"strings"
2020-12-17 14:00:47 +00:00
"code.gitea.io/gitea/modules/analyze"
"github.com/go-enry/go-enry/v2"
"github.com/go-git/go-git/v5"
"github.com/go-git/go-git/v5/plumbing"
"github.com/go-git/go-git/v5/plumbing/object"
)
// GetLanguageStats calculates language stats for git repository at specified commit
func ( repo * Repository ) GetLanguageStats ( commitID string ) ( map [ string ] int64 , error ) {
r , err := git . PlainOpen ( repo . Path )
if err != nil {
return nil , err
}
rev , err := r . ResolveRevision ( plumbing . Revision ( commitID ) )
if err != nil {
return nil , err
}
commit , err := r . CommitObject ( * rev )
if err != nil {
return nil , err
}
tree , err := commit . Tree ( )
if err != nil {
return nil , err
}
2022-06-16 23:47:44 +08:00
checker , deferable := repo . CheckAttributeReader ( commitID )
defer deferable ( )
2021-09-09 21:13:36 +01:00
2020-12-17 14:00:47 +00:00
sizes := make ( map [ string ] int64 )
err = tree . Files ( ) . ForEach ( func ( f * object . File ) error {
2021-09-09 21:13:36 +01:00
if f . Size == 0 {
return nil
}
notVendored := false
notGenerated := false
if checker != nil {
attrs , err := checker . CheckPath ( f . Name )
if err == nil {
if vendored , has := attrs [ "linguist-vendored" ] ; has {
if vendored == "set" || vendored == "true" {
return nil
}
notVendored = vendored == "false"
}
if generated , has := attrs [ "linguist-generated" ] ; has {
if generated == "set" || generated == "true" {
return nil
}
notGenerated = generated == "false"
}
if language , has := attrs [ "linguist-language" ] ; has && language != "unspecified" && language != "" {
// group languages, such as Pug -> HTML; SCSS -> CSS
group := enry . GetLanguageGroup ( language )
2021-09-21 03:46:51 +08:00
if len ( group ) != 0 {
2021-09-09 21:13:36 +01:00
language = group
}
sizes [ language ] += f . Size
return nil
2021-11-17 20:37:00 +00:00
} else if language , has := attrs [ "gitlab-language" ] ; has && language != "unspecified" && language != "" {
// strip off a ? if present
if idx := strings . IndexByte ( language , '?' ) ; idx >= 0 {
language = language [ : idx ]
}
if len ( language ) != 0 {
// group languages, such as Pug -> HTML; SCSS -> CSS
group := enry . GetLanguageGroup ( language )
if len ( group ) != 0 {
language = group
}
sizes [ language ] += f . Size
return nil
}
2021-09-09 21:13:36 +01:00
}
}
}
if ( ! notVendored && analyze . IsVendor ( f . Name ) ) || enry . IsDotFile ( f . Name ) ||
2020-12-17 14:00:47 +00:00
enry . IsDocumentation ( f . Name ) || enry . IsConfiguration ( f . Name ) {
return nil
}
// If content can not be read or file is too big just do detection by filename
var content [ ] byte
if f . Size <= bigFileSize {
content , _ = readFile ( f , fileSizeLimit )
}
2021-09-09 21:13:36 +01:00
if ! notGenerated && enry . IsGenerated ( f . Name , content ) {
2020-12-17 14:00:47 +00:00
return nil
}
// TODO: Use .gitattributes file for linguist overrides
language := analyze . GetCodeLanguage ( f . Name , content )
if language == enry . OtherLanguage || language == "" {
return nil
}
// group languages, such as Pug -> HTML; SCSS -> CSS
group := enry . GetLanguageGroup ( language )
if group != "" {
language = group
}
sizes [ language ] += f . Size
return nil
} )
if err != nil {
return nil , err
}
// filter special languages unless they are the only language
if len ( sizes ) > 1 {
for language := range sizes {
langtype := enry . GetLanguageType ( language )
if langtype != enry . Programming && langtype != enry . Markup {
delete ( sizes , language )
}
}
}
return sizes , nil
}
func readFile ( f * object . File , limit int64 ) ( [ ] byte , error ) {
r , err := f . Reader ( )
if err != nil {
return nil , err
}
defer r . Close ( )
if limit <= 0 {
2021-09-22 13:38:34 +08:00
return io . ReadAll ( r )
2020-12-17 14:00:47 +00:00
}
size := f . Size
if limit > 0 && size > limit {
size = limit
}
buf := bytes . NewBuffer ( nil )
buf . Grow ( int ( size ) )
_ , err = io . Copy ( buf , io . LimitReader ( r , limit ) )
return buf . Bytes ( ) , err
}