2015-12-17 06:13:12 +03:00
// Copyright 2015 The Gogs Authors. All rights reserved.
2020-07-01 00:34:03 +03:00
// Copyright 2020 The Gitea Authors. All rights reserved.
2022-11-27 21:20:29 +03:00
// SPDX-License-Identifier: MIT
2015-12-17 06:13:12 +03:00
2016-01-31 19:19:02 +03:00
package highlight
2015-12-17 06:13:12 +03:00
import (
2020-07-01 00:34:03 +03:00
"bufio"
"bytes"
2021-06-17 17:55:16 +03:00
"fmt"
2020-09-02 23:19:42 +03:00
gohtml "html"
2022-07-30 22:17:43 +03:00
"io"
2020-07-01 00:34:03 +03:00
"path/filepath"
2015-12-17 06:13:12 +03:00
"strings"
2020-07-01 00:34:03 +03:00
"sync"
2015-12-18 06:31:34 +03:00
2020-11-14 00:13:41 +03:00
"code.gitea.io/gitea/modules/analyze"
2020-07-01 00:34:03 +03:00
"code.gitea.io/gitea/modules/log"
2016-11-10 19:24:48 +03:00
"code.gitea.io/gitea/modules/setting"
2022-11-19 14:08:06 +03:00
"code.gitea.io/gitea/modules/util"
2021-11-17 15:34:35 +03:00
2022-09-26 08:50:03 +03:00
"github.com/alecthomas/chroma/v2"
"github.com/alecthomas/chroma/v2/formatters/html"
"github.com/alecthomas/chroma/v2/lexers"
"github.com/alecthomas/chroma/v2/styles"
2021-06-17 17:55:16 +03:00
lru "github.com/hashicorp/golang-lru"
2015-12-17 06:13:12 +03:00
)
2020-07-01 00:34:03 +03:00
// don't index files larger than this many bytes for performance purposes
2022-07-30 22:17:43 +03:00
const sizeLimit = 1024 * 1024
2020-07-01 00:34:03 +03:00
2015-12-17 06:13:12 +03:00
var (
2020-07-01 00:34:03 +03:00
// For custom user mapping
highlightMapping = map [ string ] string { }
once sync . Once
2021-06-17 17:55:16 +03:00
2021-06-24 21:37:07 +03:00
cache * lru . TwoQueueCache
2023-03-14 23:09:01 +03:00
githubStyles = styles . Get ( "github" )
2020-07-01 00:34:03 +03:00
)
// NewContext loads custom highlight map from local config
func NewContext ( ) {
once . Do ( func ( ) {
2023-02-19 19:12:01 +03:00
highlightMapping = setting . GetHighlightMapping ( )
2021-06-17 17:55:16 +03:00
// The size 512 is simply a conservative rule of thumb
2021-06-24 21:37:07 +03:00
c , err := lru . New2Q ( 512 )
2021-06-17 17:55:16 +03:00
if err != nil {
panic ( fmt . Sprintf ( "failed to initialize LRU cache for highlighter: %s" , err ) )
}
cache = c
2020-07-01 00:34:03 +03:00
} )
}
2022-11-19 14:08:06 +03:00
// Code returns a HTML version of code string with chroma syntax highlighting classes and the matched lexer name
func Code ( fileName , language , code string ) ( string , string ) {
2020-07-01 00:34:03 +03:00
NewContext ( )
2015-12-17 06:13:12 +03:00
2022-07-30 22:17:43 +03:00
// diff view newline will be passed as empty, change to literal '\n' so it can be copied
2020-07-11 08:43:12 +03:00
// preserve literal newline in blame view
if code == "" || code == "\n" {
2022-11-19 14:08:06 +03:00
return "\n" , ""
2020-07-09 00:02:38 +03:00
}
2020-07-11 08:43:12 +03:00
2020-07-01 00:34:03 +03:00
if len ( code ) > sizeLimit {
2022-11-19 14:08:06 +03:00
return code , ""
2020-07-01 00:34:03 +03:00
}
2021-05-13 12:31:23 +03:00
var lexer chroma . Lexer
2021-11-17 23:37:00 +03:00
if len ( language ) > 0 {
lexer = lexers . Get ( language )
if lexer == nil {
// Attempt stripping off the '?'
if idx := strings . IndexByte ( language , '?' ) ; idx > 0 {
lexer = lexers . Get ( language [ : idx ] )
}
}
}
if lexer == nil {
if val , ok := highlightMapping [ filepath . Ext ( fileName ) ] ; ok {
2022-01-20 20:46:10 +03:00
// use mapped value to find lexer
2021-11-17 23:37:00 +03:00
lexer = lexers . Get ( val )
}
2015-12-17 06:13:12 +03:00
}
2015-12-18 06:31:34 +03:00
2021-06-17 17:55:16 +03:00
if lexer == nil {
if l , ok := cache . Get ( fileName ) ; ok {
lexer = l . ( chroma . Lexer )
}
}
2020-07-01 00:34:03 +03:00
if lexer == nil {
2021-05-13 12:31:23 +03:00
lexer = lexers . Match ( fileName )
if lexer == nil {
lexer = lexers . Fallback
}
2021-06-17 17:55:16 +03:00
cache . Add ( fileName , lexer )
2017-06-10 03:39:16 +03:00
}
2022-11-19 14:08:06 +03:00
lexerName := formatLexerName ( lexer . Config ( ) . Name )
return CodeFromLexer ( lexer , code ) , lexerName
2021-09-24 16:29:32 +03:00
}
// CodeFromLexer returns a HTML version of code string with chroma syntax highlighting classes
func CodeFromLexer ( lexer chroma . Lexer , code string ) string {
formatter := html . New ( html . WithClasses ( true ) ,
html . WithLineNumbers ( false ) ,
html . PreventSurroundingPre ( true ) ,
)
htmlbuf := bytes . Buffer { }
htmlw := bufio . NewWriter ( & htmlbuf )
2015-12-17 06:13:12 +03:00
2022-06-20 13:02:49 +03:00
iterator , err := lexer . Tokenise ( nil , code )
2020-07-01 00:34:03 +03:00
if err != nil {
log . Error ( "Can't tokenize code: %v" , err )
return code
2015-12-18 06:31:34 +03:00
}
2020-07-01 00:34:03 +03:00
// style not used for live site but need to pass something
2023-03-14 23:09:01 +03:00
err = formatter . Format ( htmlw , githubStyles , iterator )
2020-07-01 00:34:03 +03:00
if err != nil {
log . Error ( "Can't format code: %v" , err )
return code
}
2022-07-30 22:17:43 +03:00
_ = htmlw . Flush ( )
2020-07-11 08:43:12 +03:00
// Chroma will add newlines for certain lexers in order to highlight them properly
2022-07-30 22:17:43 +03:00
// Once highlighted, strip them here, so they don't cause copy/paste trouble in HTML output
2020-07-11 08:43:12 +03:00
return strings . TrimSuffix ( htmlbuf . String ( ) , "\n" )
2015-12-18 06:31:34 +03:00
}
2022-11-19 14:08:06 +03:00
// File returns a slice of chroma syntax highlighted HTML lines of code and the matched lexer name
func File ( fileName , language string , code [ ] byte ) ( [ ] string , string , error ) {
2020-07-01 00:34:03 +03:00
NewContext ( )
if len ( code ) > sizeLimit {
2022-11-19 14:08:06 +03:00
return PlainText ( code ) , "" , nil
2020-07-01 00:34:03 +03:00
}
2022-07-30 22:17:43 +03:00
2020-07-01 00:34:03 +03:00
formatter := html . New ( html . WithClasses ( true ) ,
html . WithLineNumbers ( false ) ,
html . PreventSurroundingPre ( true ) ,
)
2021-05-13 12:31:23 +03:00
var lexer chroma . Lexer
2021-11-17 23:37:00 +03:00
// provided language overrides everything
2022-07-30 22:17:43 +03:00
if language != "" {
2021-11-17 23:37:00 +03:00
lexer = lexers . Get ( language )
}
if lexer == nil {
if val , ok := highlightMapping [ filepath . Ext ( fileName ) ] ; ok {
lexer = lexers . Get ( val )
}
2020-07-01 00:34:03 +03:00
}
if lexer == nil {
2022-07-30 22:17:43 +03:00
guessLanguage := analyze . GetCodeLanguage ( fileName , code )
2021-05-13 12:31:23 +03:00
2022-07-30 22:17:43 +03:00
lexer = lexers . Get ( guessLanguage )
2020-07-01 00:34:03 +03:00
if lexer == nil {
2021-05-13 12:31:23 +03:00
lexer = lexers . Match ( fileName )
if lexer == nil {
lexer = lexers . Fallback
}
2020-07-01 00:34:03 +03:00
}
2015-12-17 06:13:12 +03:00
}
2022-11-19 14:08:06 +03:00
lexerName := formatLexerName ( lexer . Config ( ) . Name )
2020-07-01 00:34:03 +03:00
iterator , err := lexer . Tokenise ( nil , string ( code ) )
if err != nil {
2022-11-19 14:08:06 +03:00
return nil , "" , fmt . Errorf ( "can't tokenize code: %w" , err )
2015-12-17 06:13:12 +03:00
}
2022-09-26 08:50:03 +03:00
tokensLines := chroma . SplitTokensIntoLines ( iterator . Tokens ( ) )
htmlBuf := & bytes . Buffer { }
2021-08-14 02:16:56 +03:00
2022-09-26 08:50:03 +03:00
lines := make ( [ ] string , 0 , len ( tokensLines ) )
for _ , tokens := range tokensLines {
iterator = chroma . Literator ( tokens ... )
2023-03-14 23:09:01 +03:00
err = formatter . Format ( htmlBuf , githubStyles , iterator )
2022-09-26 08:50:03 +03:00
if err != nil {
2022-11-19 14:08:06 +03:00
return nil , "" , fmt . Errorf ( "can't format code: %w" , err )
2022-09-26 08:50:03 +03:00
}
lines = append ( lines , htmlBuf . String ( ) )
htmlBuf . Reset ( )
2015-12-18 06:31:34 +03:00
}
2022-09-26 08:50:03 +03:00
2022-11-19 14:08:06 +03:00
return lines , lexerName , nil
2020-07-01 00:34:03 +03:00
}
2015-12-18 06:31:34 +03:00
2022-07-30 22:17:43 +03:00
// PlainText returns non-highlighted HTML for code
func PlainText ( code [ ] byte ) [ ] string {
r := bufio . NewReader ( bytes . NewReader ( code ) )
m := make ( [ ] string , 0 , bytes . Count ( code , [ ] byte { '\n' } ) + 1 )
for {
content , err := r . ReadString ( '\n' )
if err != nil && err != io . EOF {
log . Error ( "failed to read string from buffer: %v" , err )
break
}
if content == "" && err == io . EOF {
break
2020-07-09 00:02:38 +03:00
}
2022-07-30 22:17:43 +03:00
s := gohtml . EscapeString ( content )
m = append ( m , s )
2020-07-01 00:34:03 +03:00
}
return m
2015-12-17 06:13:12 +03:00
}
2022-11-19 14:08:06 +03:00
func formatLexerName ( name string ) string {
if name == "fallback" {
return "Plaintext"
}
return util . ToTitleCaseNoLower ( name )
}