2020-02-11 12:34:17 +03:00
// Copyright 2020 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package git
import (
"bytes"
"io"
"io/ioutil"
2020-02-20 22:53:55 +03:00
"code.gitea.io/gitea/modules/analyze"
2020-02-11 12:34:17 +03:00
2020-04-15 20:40:39 +03:00
"github.com/go-enry/go-enry/v2"
2020-03-17 19:19:58 +03:00
"github.com/go-git/go-git/v5"
"github.com/go-git/go-git/v5/plumbing"
"github.com/go-git/go-git/v5/plumbing/object"
2020-02-11 12:34:17 +03:00
)
const fileSizeLimit int64 = 16 * 1024 * 1024
2020-05-31 01:58:55 +03:00
// specialLanguages defines list of languages that are excluded from the calculation
// unless they are the only language present in repository. Only languages which under
// normal circumstances are not considered to be code should be listed here.
var specialLanguages = [ ] string {
"XML" ,
"JSON" ,
"TOML" ,
"YAML" ,
"INI" ,
"SVG" ,
"Text" ,
"Markdown" ,
}
2020-02-11 12:34:17 +03:00
// GetLanguageStats calculates language stats for git repository at specified commit
2020-05-30 10:46:15 +03:00
func ( repo * Repository ) GetLanguageStats ( commitID string ) ( map [ string ] int64 , error ) {
2020-02-11 12:34:17 +03:00
r , err := git . PlainOpen ( repo . Path )
if err != nil {
return nil , err
}
rev , err := r . ResolveRevision ( plumbing . Revision ( commitID ) )
if err != nil {
return nil , err
}
commit , err := r . CommitObject ( * rev )
if err != nil {
return nil , err
}
tree , err := commit . Tree ( )
if err != nil {
return nil , err
}
sizes := make ( map [ string ] int64 )
err = tree . Files ( ) . ForEach ( func ( f * object . File ) error {
2020-05-31 01:58:55 +03:00
if f . Size == 0 || enry . IsVendor ( f . Name ) || enry . IsDotFile ( f . Name ) ||
2020-02-11 12:34:17 +03:00
enry . IsDocumentation ( f . Name ) || enry . IsConfiguration ( f . Name ) {
return nil
}
2020-05-29 09:20:01 +03:00
// If content can not be read just do detection by filename
content , _ := readFile ( f , fileSizeLimit )
if enry . IsGenerated ( f . Name , content ) {
return nil
}
2020-02-11 12:34:17 +03:00
// TODO: Use .gitattributes file for linguist overrides
2020-05-29 09:20:01 +03:00
language := analyze . GetCodeLanguage ( f . Name , content )
2020-02-20 22:53:55 +03:00
if language == enry . OtherLanguage || language == "" {
2020-05-31 01:58:55 +03:00
return nil
}
// group languages, such as Pug -> HTML; SCSS -> CSS
group := enry . GetLanguageGroup ( language )
if group != "" {
language = group
2020-02-11 12:34:17 +03:00
}
2020-02-20 22:53:55 +03:00
sizes [ language ] += f . Size
2020-02-11 12:34:17 +03:00
return nil
} )
if err != nil {
return nil , err
}
2020-05-31 01:58:55 +03:00
// filter special languages unless they are the only language
if len ( sizes ) > 1 {
for _ , language := range specialLanguages {
delete ( sizes , language )
}
2020-02-11 12:34:17 +03:00
}
2020-05-30 10:46:15 +03:00
return sizes , nil
2020-02-11 12:34:17 +03:00
}
func readFile ( f * object . File , limit int64 ) ( [ ] byte , error ) {
r , err := f . Reader ( )
if err != nil {
return nil , err
}
defer r . Close ( )
if limit <= 0 {
return ioutil . ReadAll ( r )
}
size := f . Size
if limit > 0 && size > limit {
size = limit
}
buf := bytes . NewBuffer ( nil )
buf . Grow ( int ( size ) )
_ , err = io . Copy ( buf , io . LimitReader ( r , limit ) )
return buf . Bytes ( ) , err
}