2014-04-10 22:20:58 +04:00
// Copyright 2014 The Gogs Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
2016-02-21 01:10:05 +03:00
package markdown
2014-04-10 22:20:58 +04:00
import (
"bytes"
"fmt"
2015-03-09 07:14:50 +03:00
"io"
2014-04-10 22:20:58 +04:00
"path"
"path/filepath"
"regexp"
"strings"
2015-11-20 13:37:51 +03:00
"github.com/Unknwon/com"
2016-02-21 01:10:05 +03:00
"github.com/microcosm-cc/bluemonday"
2014-10-05 01:15:22 +04:00
"github.com/russross/blackfriday"
2015-03-24 01:32:24 +03:00
"golang.org/x/net/html"
2014-10-05 01:15:22 +04:00
2016-02-21 01:10:05 +03:00
"github.com/gogits/gogs/modules/base"
2014-09-14 21:35:22 +04:00
"github.com/gogits/gogs/modules/setting"
2014-04-10 22:20:58 +04:00
)
2016-04-23 01:28:08 +03:00
const (
2016-06-29 18:07:39 +03:00
ISSUE_NAME_STYLE_NUMERIC = "numeric"
2016-04-23 01:28:08 +03:00
ISSUE_NAME_STYLE_ALPHANUMERIC = "alphanumeric"
)
2016-02-21 01:10:05 +03:00
var Sanitizer = bluemonday . UGCPolicy ( )
2016-01-31 23:38:20 +03:00
2016-02-21 01:10:05 +03:00
// BuildSanitizer initializes sanitizer with allowed attributes based on settings.
// This function should only be called once during entire application lifecycle.
func BuildSanitizer ( ) {
// Normal markdown-stuff
Sanitizer . AllowAttrs ( "class" ) . Matching ( regexp . MustCompile ( ` [\p { L}\p { N}\s\-_',:\[\]!\./\\\(\)&]* ` ) ) . OnElements ( "code" )
// Checkboxes
Sanitizer . AllowAttrs ( "type" ) . Matching ( regexp . MustCompile ( ` ^checkbox$ ` ) ) . OnElements ( "input" )
Sanitizer . AllowAttrs ( "checked" , "disabled" ) . OnElements ( "input" )
2014-04-10 22:20:58 +04:00
2016-02-21 01:10:05 +03:00
// Custom URL-Schemes
Sanitizer . AllowURLSchemes ( setting . Markdown . CustomURLSchemes ... )
2014-04-10 22:20:58 +04:00
}
2016-01-14 09:20:03 +03:00
var validLinksPattern = regexp . MustCompile ( ` ^[a-z][\w-]+:// ` )
2014-04-10 22:20:58 +04:00
2016-02-21 01:10:05 +03:00
// isLink reports whether link fits valid format.
2014-04-10 22:20:58 +04:00
func isLink ( link [ ] byte ) bool {
2016-01-14 09:20:03 +03:00
return validLinksPattern . Match ( link )
2014-04-10 22:20:58 +04:00
}
2016-02-21 01:10:05 +03:00
// IsMarkdownFile reports whether name looks like a Markdown file
// based on its extension.
2014-04-10 22:20:58 +04:00
func IsMarkdownFile ( name string ) bool {
2016-08-11 15:48:08 +03:00
extension := strings . ToLower ( filepath . Ext ( name ) )
for _ , ext := range setting . Markdown . MdFileExtensions {
if strings . ToLower ( ext ) == extension {
return true
}
2014-04-10 22:20:58 +04:00
}
return false
}
2016-02-21 01:10:05 +03:00
// IsReadmeFile reports whether name looks like a README file
// based on its extension.
2014-04-10 22:20:58 +04:00
func IsReadmeFile ( name string ) bool {
name = strings . ToLower ( name )
if len ( name ) < 6 {
return false
2015-02-03 07:04:36 +03:00
} else if len ( name ) == 6 {
2016-02-21 01:10:05 +03:00
return name == "readme"
2014-04-10 22:20:58 +04:00
}
2016-02-21 01:10:05 +03:00
return name [ : 7 ] == "readme."
2014-04-10 22:20:58 +04:00
}
2016-01-09 05:59:04 +03:00
var (
2016-02-21 01:10:05 +03:00
// MentionPattern matches string that mentions someone, e.g. @Unknwon
2016-07-23 12:29:34 +03:00
MentionPattern = regexp . MustCompile ( ` (\s|^)@[0-9a-zA-Z-_\.]+ ` )
2016-02-21 01:10:05 +03:00
// CommitPattern matches link to certain commit with or without trailing hash,
// e.g. https://try.gogs.io/gogs/gogs/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2
CommitPattern = regexp . MustCompile ( ` (\s|^)https?.*commit/[0-9a-zA-Z]+(#+[0-9a-zA-Z-]*)? ` )
// IssueFullPattern matches link to an issue with or without trailing hash,
// e.g. https://try.gogs.io/gogs/gogs/issues/4#issue-685
IssueFullPattern = regexp . MustCompile ( ` (\s|^)https?.*issues/[0-9]+(#+[0-9a-zA-Z-]*)? ` )
2016-04-23 01:28:08 +03:00
// IssueNumericPattern matches string that references to a numeric issue, e.g. #1287
IssueNumericPattern = regexp . MustCompile ( ` ( |^|\()#[0-9]+\b ` )
// IssueAlphanumericPattern matches string that references to an alphanumeric issue, e.g. ABC-1234
IssueAlphanumericPattern = regexp . MustCompile ( ` ( |^|\()[A-Z] { 1,10}-[1-9][0-9]*\b ` )
2016-02-21 01:10:05 +03:00
// Sha1CurrentPattern matches string that represents a commit SHA, e.g. d8a994ef243349f321568f9e36d5c3f444b99cae
2016-08-12 01:34:00 +03:00
Sha1CurrentPattern = regexp . MustCompile ( ` \b[0-9a-f] { 7,40}\b ` )
2016-01-09 05:59:04 +03:00
)
2016-07-15 19:36:39 +03:00
// FindAllMentions matches mention patterns in given content
// and returns a list of found user names without @ prefix.
func FindAllMentions ( content string ) [ ] string {
mentions := MentionPattern . FindAllString ( content , - 1 )
for i := range mentions {
mentions [ i ] = strings . TrimSpace ( mentions [ i ] ) [ 1 : ] // Strip @ character
}
return mentions
}
2016-02-21 01:10:05 +03:00
// Renderer is a extended version of underlying render object.
type Renderer struct {
2014-10-05 01:15:22 +04:00
blackfriday . Renderer
2014-04-10 22:20:58 +04:00
urlPrefix string
}
2016-02-21 01:10:05 +03:00
// Link defines how formal links should be processed to produce corresponding HTML elements.
func ( r * Renderer ) Link ( out * bytes . Buffer , link [ ] byte , title [ ] byte , content [ ] byte ) {
2014-04-10 22:20:58 +04:00
if len ( link ) > 0 && ! isLink ( link ) {
2016-02-21 01:10:05 +03:00
if link [ 0 ] != '#' {
2016-01-09 05:59:04 +03:00
link = [ ] byte ( path . Join ( r . urlPrefix , string ( link ) ) )
}
}
r . Renderer . Link ( out , link , title , content )
}
2016-02-21 01:10:05 +03:00
// AutoLink defines how auto-detected links should be processed to produce corresponding HTML elements.
// Reference for kind: https://github.com/russross/blackfriday/blob/master/markdown.go#L69-L76
func ( r * Renderer ) AutoLink ( out * bytes . Buffer , link [ ] byte , kind int ) {
if kind != blackfriday . LINK_TYPE_NORMAL {
2016-01-09 05:59:04 +03:00
r . Renderer . AutoLink ( out , link , kind )
return
}
2016-02-21 01:10:05 +03:00
// Since this method could only possibly serve one link at a time,
// we do not need to find all.
2016-06-29 18:07:39 +03:00
if bytes . HasPrefix ( link , [ ] byte ( setting . AppUrl ) ) {
m := CommitPattern . Find ( link )
if m != nil {
m = bytes . TrimSpace ( m )
i := strings . Index ( string ( m ) , "commit/" )
j := strings . Index ( string ( m ) , "#" )
if j == - 1 {
j = len ( m )
}
out . WriteString ( fmt . Sprintf ( ` <code><a href="%s">%s</a></code> ` , m , base . ShortSha ( string ( m [ i + 7 : j ] ) ) ) )
return
2016-01-09 05:59:04 +03:00
}
2016-06-29 18:07:39 +03:00
m = IssueFullPattern . Find ( link )
if m != nil {
m = bytes . TrimSpace ( m )
i := strings . Index ( string ( m ) , "issues/" )
j := strings . Index ( string ( m ) , "#" )
if j == - 1 {
j = len ( m )
}
out . WriteString ( fmt . Sprintf ( ` <a href="%s">#%s</a> ` , m , base . ShortSha ( string ( m [ i + 7 : j ] ) ) ) )
return
2014-04-10 22:20:58 +04:00
}
}
2016-01-09 05:59:04 +03:00
r . Renderer . AutoLink ( out , link , kind )
2014-04-10 22:20:58 +04:00
}
2016-02-21 01:10:05 +03:00
// ListItem defines how list items should be processed to produce corresponding HTML elements.
func ( options * Renderer ) ListItem ( out * bytes . Buffer , text [ ] byte , flags int ) {
// Detect procedures to draw checkboxes.
2016-01-13 15:25:52 +03:00
switch {
case bytes . HasPrefix ( text , [ ] byte ( "[ ] " ) ) :
text = append ( [ ] byte ( ` <input type="checkbox" disabled="" /> ` ) , text [ 3 : ] ... )
case bytes . HasPrefix ( text , [ ] byte ( "[x] " ) ) :
text = append ( [ ] byte ( ` <input type="checkbox" disabled="" checked="" /> ` ) , text [ 3 : ] ... )
}
options . Renderer . ListItem ( out , text , flags )
}
2016-02-21 01:10:05 +03:00
// Note: this section is for purpose of increase performance and
// reduce memory allocation at runtime since they are constant literals.
2015-11-20 13:37:51 +03:00
var (
svgSuffix = [ ] byte ( ".svg" )
svgSuffixWithMark = [ ] byte ( ".svg?" )
2016-02-05 06:51:40 +03:00
spaceBytes = [ ] byte ( " " )
spaceEncodedBytes = [ ] byte ( "%20" )
2016-03-15 23:28:55 +03:00
space = " "
spaceEncoded = "%20"
2015-11-20 13:37:51 +03:00
)
2016-02-21 01:10:05 +03:00
// Image defines how images should be processed to produce corresponding HTML elements.
func ( r * Renderer ) Image ( out * bytes . Buffer , link [ ] byte , title [ ] byte , alt [ ] byte ) {
2016-01-09 05:59:04 +03:00
prefix := strings . Replace ( r . urlPrefix , "/src/" , "/raw/" , 1 )
2015-11-20 13:37:51 +03:00
if len ( link ) > 0 {
if isLink ( link ) {
// External link with .svg suffix usually means CI status.
2016-02-21 01:10:05 +03:00
// TODO: define a keyword to allow non-svg images render as external link.
2015-11-20 13:37:51 +03:00
if bytes . HasSuffix ( link , svgSuffix ) || bytes . Contains ( link , svgSuffixWithMark ) {
2016-01-09 05:59:04 +03:00
r . Renderer . Image ( out , link , title , alt )
2015-11-20 13:37:51 +03:00
return
}
} else {
if link [ 0 ] != '/' {
prefix += "/"
}
2016-02-05 06:51:40 +03:00
link = bytes . Replace ( [ ] byte ( ( prefix + string ( link ) ) ) , spaceBytes , spaceEncodedBytes , - 1 )
fmt . Println ( 333 , string ( link ) )
2015-11-06 19:10:27 +03:00
}
2014-10-15 07:44:34 +04:00
}
2015-11-06 19:10:27 +03:00
out . WriteString ( ` <a href=" ` )
out . Write ( link )
out . WriteString ( ` "> ` )
2016-01-09 05:59:04 +03:00
r . Renderer . Image ( out , link , title , alt )
2015-11-06 19:10:27 +03:00
out . WriteString ( "</a>" )
2014-10-15 07:44:34 +04:00
}
2016-02-21 01:10:05 +03:00
// cutoutVerbosePrefix cutouts URL prefix including sub-path to
// return a clean unified string of request URL path.
2015-11-16 01:37:26 +03:00
func cutoutVerbosePrefix ( prefix string ) string {
2016-07-15 19:36:39 +03:00
if len ( prefix ) == 0 || prefix [ 0 ] != '/' {
return prefix
}
2015-11-16 01:37:26 +03:00
count := 0
for i := 0 ; i < len ( prefix ) ; i ++ {
if prefix [ i ] == '/' {
count ++
}
2016-01-31 23:38:20 +03:00
if count >= 3 + setting . AppSubUrlDepth {
2015-11-16 01:37:26 +03:00
return prefix [ : i ]
}
2015-11-16 00:22:25 +03:00
}
2015-11-16 01:37:26 +03:00
return prefix
}
2016-02-21 01:10:05 +03:00
// RenderIssueIndexPattern renders issue indexes to corresponding links.
2015-12-05 05:30:33 +03:00
func RenderIssueIndexPattern ( rawBytes [ ] byte , urlPrefix string , metas map [ string ] string ) [ ] byte {
2015-11-16 01:37:26 +03:00
urlPrefix = cutoutVerbosePrefix ( urlPrefix )
2016-04-23 01:28:08 +03:00
pattern := IssueNumericPattern
if metas [ "style" ] == ISSUE_NAME_STYLE_ALPHANUMERIC {
pattern = IssueAlphanumericPattern
}
ms := pattern . FindAll ( rawBytes , - 1 )
2014-04-10 22:20:58 +04:00
for _ , m := range ms {
2016-06-29 18:07:39 +03:00
if m [ 0 ] == ' ' || m [ 0 ] == '(' {
2016-04-23 01:28:08 +03:00
m = m [ 1 : ] // ignore leading space or opening parentheses
2015-03-24 01:32:24 +03:00
}
2016-04-23 01:28:08 +03:00
var link string
2015-12-05 05:30:33 +03:00
if metas == nil {
2016-04-23 01:28:08 +03:00
link = fmt . Sprintf ( ` <a href="%s/issues/%s">%s</a> ` , urlPrefix , m [ 1 : ] , m )
2015-12-05 05:30:33 +03:00
} else {
// Support for external issue tracker
2016-04-23 01:28:08 +03:00
if metas [ "style" ] == ISSUE_NAME_STYLE_ALPHANUMERIC {
metas [ "index" ] = string ( m )
} else {
metas [ "index" ] = string ( m [ 1 : ] )
}
link = fmt . Sprintf ( ` <a href="%s">%s</a> ` , com . Expand ( metas [ "format" ] , metas ) , m )
2015-12-05 05:30:33 +03:00
}
2016-04-23 01:28:08 +03:00
rawBytes = bytes . Replace ( rawBytes , m , [ ] byte ( link ) , 1 )
2015-12-05 05:30:33 +03:00
}
return rawBytes
}
2016-02-21 01:10:05 +03:00
// RenderSha1CurrentPattern renders SHA1 strings to corresponding links that assumes in the same repository.
func RenderSha1CurrentPattern ( rawBytes [ ] byte , urlPrefix string ) [ ] byte {
2016-08-10 21:35:06 +03:00
return [ ] byte ( Sha1CurrentPattern . ReplaceAllStringFunc ( string ( rawBytes [ : ] ) , func ( m string ) string {
return fmt . Sprintf ( ` <a href="%s/commit/%s"><code>%s</code></a> ` , urlPrefix , m , base . ShortSha ( string ( m ) ) )
} ) )
2016-02-21 01:10:05 +03:00
}
// RenderSpecialLink renders mentions, indexes and SHA1 strings to corresponding links.
2015-12-05 05:30:33 +03:00
func RenderSpecialLink ( rawBytes [ ] byte , urlPrefix string , metas map [ string ] string ) [ ] byte {
ms := MentionPattern . FindAll ( rawBytes , - 1 )
for _ , m := range ms {
m = bytes . TrimSpace ( m )
rawBytes = bytes . Replace ( rawBytes , m ,
[ ] byte ( fmt . Sprintf ( ` <a href="%s/%s">%s</a> ` , setting . AppSubUrl , m [ 1 : ] , m ) ) , - 1 )
}
rawBytes = RenderIssueIndexPattern ( rawBytes , urlPrefix , metas )
rawBytes = RenderSha1CurrentPattern ( rawBytes , urlPrefix )
return rawBytes
}
2016-02-21 01:10:05 +03:00
// RenderRaw renders Markdown to HTML without handling special links.
func RenderRaw ( body [ ] byte , urlPrefix string ) [ ] byte {
2014-04-10 22:20:58 +04:00
htmlFlags := 0
2014-10-05 01:15:22 +04:00
htmlFlags |= blackfriday . HTML_SKIP_STYLE
htmlFlags |= blackfriday . HTML_OMIT_CONTENTS
2016-02-21 01:10:05 +03:00
renderer := & Renderer {
2014-10-05 01:15:22 +04:00
Renderer : blackfriday . HtmlRenderer ( htmlFlags , "" , "" ) ,
2014-04-10 22:20:58 +04:00
urlPrefix : urlPrefix ,
}
// set up the parser
extensions := 0
2014-10-05 01:15:22 +04:00
extensions |= blackfriday . EXTENSION_NO_INTRA_EMPHASIS
extensions |= blackfriday . EXTENSION_TABLES
extensions |= blackfriday . EXTENSION_FENCED_CODE
extensions |= blackfriday . EXTENSION_AUTOLINK
extensions |= blackfriday . EXTENSION_STRIKETHROUGH
extensions |= blackfriday . EXTENSION_SPACE_HEADERS
extensions |= blackfriday . EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK
2015-09-01 15:32:02 +03:00
if setting . Markdown . EnableHardLineBreak {
extensions |= blackfriday . EXTENSION_HARD_LINE_BREAK
}
2014-10-05 01:15:22 +04:00
body = blackfriday . Markdown ( body , renderer , extensions )
2014-05-05 21:08:01 +04:00
return body
}
2015-11-20 09:52:11 +03:00
var (
leftAngleBracket = [ ] byte ( "</" )
rightAngleBracket = [ ] byte ( ">" )
)
2015-11-20 13:37:51 +03:00
var noEndTags = [ ] string { "img" , "input" , "br" , "hr" }
2016-02-21 01:10:05 +03:00
// PostProcess treats different types of HTML differently,
2015-03-24 01:32:24 +03:00
// and only renders special links for plain text blocks.
2016-02-21 01:10:05 +03:00
func PostProcess ( rawHtml [ ] byte , urlPrefix string , metas map [ string ] string ) [ ] byte {
2015-11-20 13:37:51 +03:00
startTags := make ( [ ] string , 0 , 5 )
2015-03-09 07:14:50 +03:00
var buf bytes . Buffer
tokenizer := html . NewTokenizer ( bytes . NewReader ( rawHtml ) )
2015-11-20 13:37:51 +03:00
OUTER_LOOP :
2015-03-09 07:14:50 +03:00
for html . ErrorToken != tokenizer . Next ( ) {
token := tokenizer . Token ( )
switch token . Type {
2015-03-24 01:32:24 +03:00
case html . TextToken :
2015-12-05 05:30:33 +03:00
buf . Write ( RenderSpecialLink ( [ ] byte ( token . String ( ) ) , urlPrefix , metas ) )
2015-03-24 01:32:24 +03:00
case html . StartTagToken :
buf . WriteString ( token . String ( ) )
tagName := token . Data
// If this is an excluded tag, we skip processing all output until a close tag is encountered.
if strings . EqualFold ( "a" , tagName ) || strings . EqualFold ( "code" , tagName ) || strings . EqualFold ( "pre" , tagName ) {
2015-11-22 05:06:11 +03:00
stackNum := 1
2015-03-24 01:32:24 +03:00
for html . ErrorToken != tokenizer . Next ( ) {
token = tokenizer . Token ( )
2015-11-20 13:37:51 +03:00
2015-03-24 01:32:24 +03:00
// Copy the token to the output verbatim
buf . WriteString ( token . String ( ) )
2015-11-22 05:06:11 +03:00
if token . Type == html . StartTagToken {
stackNum ++
}
// If this is the close tag to the outer-most, we are done
2016-02-20 01:39:50 +03:00
if token . Type == html . EndTagToken {
2015-11-22 05:06:11 +03:00
stackNum --
2016-02-20 01:39:50 +03:00
if stackNum <= 0 && strings . EqualFold ( tagName , token . Data ) {
2015-11-22 05:06:11 +03:00
break
}
2015-03-09 07:14:50 +03:00
}
}
2015-11-20 13:37:51 +03:00
continue OUTER_LOOP
}
if ! com . IsSliceContainsStr ( noEndTags , token . Data ) {
startTags = append ( startTags , token . Data )
2015-03-24 01:32:24 +03:00
}
2015-03-09 07:14:50 +03:00
2015-11-20 09:52:11 +03:00
case html . EndTagToken :
2015-11-25 03:28:24 +03:00
if len ( startTags ) == 0 {
2015-11-25 03:29:35 +03:00
buf . WriteString ( token . String ( ) )
2015-11-25 03:28:24 +03:00
break
}
2015-11-20 09:52:11 +03:00
buf . Write ( leftAngleBracket )
2015-11-20 13:37:51 +03:00
buf . WriteString ( startTags [ len ( startTags ) - 1 ] )
2015-11-20 09:52:11 +03:00
buf . Write ( rightAngleBracket )
2015-11-20 13:37:51 +03:00
startTags = startTags [ : len ( startTags ) - 1 ]
2015-03-24 01:32:24 +03:00
default :
buf . WriteString ( token . String ( ) )
2015-03-09 07:14:50 +03:00
}
}
if io . EOF == tokenizer . Err ( ) {
return buf . Bytes ( )
}
2015-03-24 01:32:24 +03:00
// If we are not at the end of the input, then some other parsing error has occurred,
// so return the input verbatim.
2015-03-09 07:14:50 +03:00
return rawHtml
}
2015-03-24 01:32:24 +03:00
2016-02-21 01:10:05 +03:00
// Render renders Markdown to HTML with special links.
func Render ( rawBytes [ ] byte , urlPrefix string , metas map [ string ] string ) [ ] byte {
2016-03-15 23:28:55 +03:00
urlPrefix = strings . Replace ( urlPrefix , space , spaceEncoded , - 1 )
2016-02-21 01:10:05 +03:00
result := RenderRaw ( rawBytes , urlPrefix )
result = PostProcess ( result , urlPrefix , metas )
2015-03-24 01:32:24 +03:00
result = Sanitizer . SanitizeBytes ( result )
return result
}
2016-02-21 01:10:05 +03:00
// RenderString renders Markdown to HTML with special links and returns string type.
func RenderString ( raw , urlPrefix string , metas map [ string ] string ) string {
return string ( Render ( [ ] byte ( raw ) , urlPrefix , metas ) )
2015-03-24 01:32:24 +03:00
}