2015-12-16 22:13:12 -05:00
// Copyright 2015 The Gogs Authors. All rights reserved.
2020-06-30 17:34:03 -04:00
// Copyright 2020 The Gitea Authors. All rights reserved.
2015-12-16 22:13:12 -05:00
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
2016-01-31 14:19:02 -02:00
package highlight
2015-12-16 22:13:12 -05:00
import (
2020-06-30 17:34:03 -04:00
"bufio"
"bytes"
2021-06-17 22:55:16 +08:00
"fmt"
2020-09-02 21:19:42 +01:00
gohtml "html"
2022-07-30 21:17:43 +02:00
"io"
2020-06-30 17:34:03 -04:00
"path/filepath"
2015-12-16 22:13:12 -05:00
"strings"
2020-06-30 17:34:03 -04:00
"sync"
2015-12-17 22:31:34 -05:00
2020-11-13 16:13:41 -05:00
"code.gitea.io/gitea/modules/analyze"
2020-06-30 17:34:03 -04:00
"code.gitea.io/gitea/modules/log"
2016-11-10 17:24:48 +01:00
"code.gitea.io/gitea/modules/setting"
2021-11-17 20:34:35 +08:00
2022-09-26 07:50:03 +02:00
"github.com/alecthomas/chroma/v2"
"github.com/alecthomas/chroma/v2/formatters/html"
"github.com/alecthomas/chroma/v2/lexers"
"github.com/alecthomas/chroma/v2/styles"
2021-06-17 22:55:16 +08:00
lru "github.com/hashicorp/golang-lru"
2015-12-16 22:13:12 -05:00
)
2020-06-30 17:34:03 -04:00
// don't index files larger than this many bytes for performance purposes
2022-07-30 21:17:43 +02:00
const sizeLimit = 1024 * 1024
2020-06-30 17:34:03 -04:00
2015-12-16 22:13:12 -05:00
var (
2020-06-30 17:34:03 -04:00
// For custom user mapping
highlightMapping = map [ string ] string { }
once sync . Once
2021-06-17 22:55:16 +08:00
2021-06-25 02:37:07 +08:00
cache * lru . TwoQueueCache
2020-06-30 17:34:03 -04:00
)
// NewContext loads custom highlight map from local config
func NewContext ( ) {
once . Do ( func ( ) {
2022-07-23 19:28:02 +08:00
if setting . Cfg != nil {
keys := setting . Cfg . Section ( "highlight.mapping" ) . Keys ( )
for i := range keys {
highlightMapping [ keys [ i ] . Name ( ) ] = keys [ i ] . Value ( )
}
2020-06-30 17:34:03 -04:00
}
2021-06-17 22:55:16 +08:00
// The size 512 is simply a conservative rule of thumb
2021-06-25 02:37:07 +08:00
c , err := lru . New2Q ( 512 )
2021-06-17 22:55:16 +08:00
if err != nil {
panic ( fmt . Sprintf ( "failed to initialize LRU cache for highlighter: %s" , err ) )
}
cache = c
2020-06-30 17:34:03 -04:00
} )
}
// Code returns a HTML version of code string with chroma syntax highlighting classes
2021-11-17 20:37:00 +00:00
func Code ( fileName , language , code string ) string {
2020-06-30 17:34:03 -04:00
NewContext ( )
2015-12-16 22:13:12 -05:00
2022-07-30 21:17:43 +02:00
// diff view newline will be passed as empty, change to literal '\n' so it can be copied
2020-07-11 01:43:12 -04:00
// preserve literal newline in blame view
if code == "" || code == "\n" {
2020-07-08 17:02:38 -04:00
return "\n"
}
2020-07-11 01:43:12 -04:00
2020-06-30 17:34:03 -04:00
if len ( code ) > sizeLimit {
return code
}
2021-05-13 11:31:23 +02:00
var lexer chroma . Lexer
2021-11-17 20:37:00 +00:00
if len ( language ) > 0 {
lexer = lexers . Get ( language )
if lexer == nil {
// Attempt stripping off the '?'
if idx := strings . IndexByte ( language , '?' ) ; idx > 0 {
lexer = lexers . Get ( language [ : idx ] )
}
}
}
if lexer == nil {
if val , ok := highlightMapping [ filepath . Ext ( fileName ) ] ; ok {
2022-01-20 18:46:10 +01:00
// use mapped value to find lexer
2021-11-17 20:37:00 +00:00
lexer = lexers . Get ( val )
}
2015-12-16 22:13:12 -05:00
}
2015-12-17 22:31:34 -05:00
2021-06-17 22:55:16 +08:00
if lexer == nil {
if l , ok := cache . Get ( fileName ) ; ok {
lexer = l . ( chroma . Lexer )
}
}
2020-06-30 17:34:03 -04:00
if lexer == nil {
2021-05-13 11:31:23 +02:00
lexer = lexers . Match ( fileName )
if lexer == nil {
lexer = lexers . Fallback
}
2021-06-17 22:55:16 +08:00
cache . Add ( fileName , lexer )
2017-06-09 20:39:16 -04:00
}
2021-09-24 14:29:32 +01:00
return CodeFromLexer ( lexer , code )
}
// CodeFromLexer returns a HTML version of code string with chroma syntax highlighting classes
func CodeFromLexer ( lexer chroma . Lexer , code string ) string {
formatter := html . New ( html . WithClasses ( true ) ,
html . WithLineNumbers ( false ) ,
html . PreventSurroundingPre ( true ) ,
)
htmlbuf := bytes . Buffer { }
htmlw := bufio . NewWriter ( & htmlbuf )
2015-12-16 22:13:12 -05:00
2022-06-20 12:02:49 +02:00
iterator , err := lexer . Tokenise ( nil , code )
2020-06-30 17:34:03 -04:00
if err != nil {
log . Error ( "Can't tokenize code: %v" , err )
return code
2015-12-17 22:31:34 -05:00
}
2020-06-30 17:34:03 -04:00
// style not used for live site but need to pass something
err = formatter . Format ( htmlw , styles . GitHub , iterator )
if err != nil {
log . Error ( "Can't format code: %v" , err )
return code
}
2022-07-30 21:17:43 +02:00
_ = htmlw . Flush ( )
2020-07-11 01:43:12 -04:00
// Chroma will add newlines for certain lexers in order to highlight them properly
2022-07-30 21:17:43 +02:00
// Once highlighted, strip them here, so they don't cause copy/paste trouble in HTML output
2020-07-11 01:43:12 -04:00
return strings . TrimSuffix ( htmlbuf . String ( ) , "\n" )
2015-12-17 22:31:34 -05:00
}
2022-07-30 21:17:43 +02:00
// File returns a slice of chroma syntax highlighted HTML lines of code
func File ( fileName , language string , code [ ] byte ) ( [ ] string , error ) {
2020-06-30 17:34:03 -04:00
NewContext ( )
if len ( code ) > sizeLimit {
2022-07-30 21:17:43 +02:00
return PlainText ( code ) , nil
2020-06-30 17:34:03 -04:00
}
2022-07-30 21:17:43 +02:00
2020-06-30 17:34:03 -04:00
formatter := html . New ( html . WithClasses ( true ) ,
html . WithLineNumbers ( false ) ,
html . PreventSurroundingPre ( true ) ,
)
2021-05-13 11:31:23 +02:00
var lexer chroma . Lexer
2021-11-17 20:37:00 +00:00
// provided language overrides everything
2022-07-30 21:17:43 +02:00
if language != "" {
2021-11-17 20:37:00 +00:00
lexer = lexers . Get ( language )
}
if lexer == nil {
if val , ok := highlightMapping [ filepath . Ext ( fileName ) ] ; ok {
lexer = lexers . Get ( val )
}
2020-06-30 17:34:03 -04:00
}
if lexer == nil {
2022-07-30 21:17:43 +02:00
guessLanguage := analyze . GetCodeLanguage ( fileName , code )
2021-05-13 11:31:23 +02:00
2022-07-30 21:17:43 +02:00
lexer = lexers . Get ( guessLanguage )
2020-06-30 17:34:03 -04:00
if lexer == nil {
2021-05-13 11:31:23 +02:00
lexer = lexers . Match ( fileName )
if lexer == nil {
lexer = lexers . Fallback
}
2020-06-30 17:34:03 -04:00
}
2015-12-16 22:13:12 -05:00
}
2020-06-30 17:34:03 -04:00
iterator , err := lexer . Tokenise ( nil , string ( code ) )
if err != nil {
2022-07-30 21:17:43 +02:00
return nil , fmt . Errorf ( "can't tokenize code: %w" , err )
2015-12-16 22:13:12 -05:00
}
2022-09-26 07:50:03 +02:00
tokensLines := chroma . SplitTokensIntoLines ( iterator . Tokens ( ) )
htmlBuf := & bytes . Buffer { }
2021-08-14 00:16:56 +01:00
2022-09-26 07:50:03 +02:00
lines := make ( [ ] string , 0 , len ( tokensLines ) )
for _ , tokens := range tokensLines {
iterator = chroma . Literator ( tokens ... )
err = formatter . Format ( htmlBuf , styles . GitHub , iterator )
if err != nil {
return nil , fmt . Errorf ( "can't format code: %w" , err )
}
lines = append ( lines , htmlBuf . String ( ) )
htmlBuf . Reset ( )
2015-12-17 22:31:34 -05:00
}
2022-09-26 07:50:03 +02:00
return lines , nil
2020-06-30 17:34:03 -04:00
}
2015-12-17 22:31:34 -05:00
2022-07-30 21:17:43 +02:00
// PlainText returns non-highlighted HTML for code
func PlainText ( code [ ] byte ) [ ] string {
r := bufio . NewReader ( bytes . NewReader ( code ) )
m := make ( [ ] string , 0 , bytes . Count ( code , [ ] byte { '\n' } ) + 1 )
for {
content , err := r . ReadString ( '\n' )
if err != nil && err != io . EOF {
log . Error ( "failed to read string from buffer: %v" , err )
break
}
if content == "" && err == io . EOF {
break
2020-07-08 17:02:38 -04:00
}
2022-07-30 21:17:43 +02:00
s := gohtml . EscapeString ( content )
m = append ( m , s )
2020-06-30 17:34:03 -04:00
}
return m
2015-12-16 22:13:12 -05:00
}