2014-04-10 22:20:58 +04:00
// Copyright 2014 The Gogs Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package base
import (
"bytes"
"fmt"
2015-03-09 07:14:50 +03:00
"io"
2014-04-10 22:20:58 +04:00
"net/http"
"path"
"path/filepath"
"regexp"
"strings"
2015-11-20 13:37:51 +03:00
"github.com/Unknwon/com"
2014-10-05 01:15:22 +04:00
"github.com/russross/blackfriday"
2015-03-24 01:32:24 +03:00
"golang.org/x/net/html"
2014-10-05 01:15:22 +04:00
2014-09-14 21:35:22 +04:00
"github.com/gogits/gogs/modules/setting"
2014-04-10 22:20:58 +04:00
)
func isletter ( c byte ) bool {
return ( c >= 'a' && c <= 'z' ) || ( c >= 'A' && c <= 'Z' )
}
func isalnum ( c byte ) bool {
return ( c >= '0' && c <= '9' ) || isletter ( c )
}
var validLinks = [ ] [ ] byte { [ ] byte ( "http://" ) , [ ] byte ( "https://" ) , [ ] byte ( "ftp://" ) , [ ] byte ( "mailto://" ) }
func isLink ( link [ ] byte ) bool {
for _ , prefix := range validLinks {
if len ( link ) > len ( prefix ) && bytes . Equal ( bytes . ToLower ( link [ : len ( prefix ) ] ) , prefix ) && isalnum ( link [ len ( prefix ) ] ) {
return true
}
}
return false
}
func IsMarkdownFile ( name string ) bool {
name = strings . ToLower ( name )
switch filepath . Ext ( name ) {
2014-08-23 17:13:55 +04:00
case ".md" , ".markdown" , ".mdown" , ".mkd" :
2014-04-10 22:20:58 +04:00
return true
}
return false
}
func IsTextFile ( data [ ] byte ) ( string , bool ) {
contentType := http . DetectContentType ( data )
if strings . Index ( contentType , "text/" ) != - 1 {
return contentType , true
}
return contentType , false
}
func IsImageFile ( data [ ] byte ) ( string , bool ) {
contentType := http . DetectContentType ( data )
if strings . Index ( contentType , "image/" ) != - 1 {
return contentType , true
}
return contentType , false
}
2015-02-03 07:04:36 +03:00
// IsReadmeFile returns true if given file name suppose to be a README file.
2014-04-10 22:20:58 +04:00
func IsReadmeFile ( name string ) bool {
name = strings . ToLower ( name )
if len ( name ) < 6 {
return false
2015-02-03 07:04:36 +03:00
} else if len ( name ) == 6 {
if name == "readme" {
return true
}
return false
2014-04-10 22:20:58 +04:00
}
2015-02-03 07:04:36 +03:00
if name [ : 7 ] == "readme." {
2014-04-10 22:20:58 +04:00
return true
}
return false
}
type CustomRender struct {
2014-10-05 01:15:22 +04:00
blackfriday . Renderer
2014-04-10 22:20:58 +04:00
urlPrefix string
}
func ( options * CustomRender ) Link ( out * bytes . Buffer , link [ ] byte , title [ ] byte , content [ ] byte ) {
if len ( link ) > 0 && ! isLink ( link ) {
if link [ 0 ] == '#' {
// link = append([]byte(options.urlPrefix), link...)
} else {
link = [ ] byte ( path . Join ( options . urlPrefix , string ( link ) ) )
}
}
options . Renderer . Link ( out , link , title , content )
}
2015-11-20 13:37:51 +03:00
var (
svgSuffix = [ ] byte ( ".svg" )
svgSuffixWithMark = [ ] byte ( ".svg?" )
)
2014-10-15 07:44:34 +04:00
func ( options * CustomRender ) Image ( out * bytes . Buffer , link [ ] byte , title [ ] byte , alt [ ] byte ) {
2015-11-06 19:10:27 +03:00
prefix := strings . Replace ( options . urlPrefix , "/src/" , "/raw/" , 1 )
2015-11-20 13:37:51 +03:00
if len ( link ) > 0 {
if isLink ( link ) {
// External link with .svg suffix usually means CI status.
if bytes . HasSuffix ( link , svgSuffix ) || bytes . Contains ( link , svgSuffixWithMark ) {
options . Renderer . Image ( out , link , title , alt )
return
}
} else {
if link [ 0 ] != '/' {
prefix += "/"
}
link = [ ] byte ( prefix + string ( link ) )
2015-11-06 19:10:27 +03:00
}
2014-10-15 07:44:34 +04:00
}
2015-11-06 19:10:27 +03:00
out . WriteString ( ` <a href=" ` )
out . Write ( link )
out . WriteString ( ` "> ` )
2014-10-15 07:44:34 +04:00
options . Renderer . Image ( out , link , title , alt )
2015-11-06 19:10:27 +03:00
out . WriteString ( "</a>" )
2014-10-15 07:44:34 +04:00
}
2014-04-10 22:20:58 +04:00
var (
2015-03-24 01:32:24 +03:00
MentionPattern = regexp . MustCompile ( ` (\s|^)@[0-9a-zA-Z_\.]+ ` )
2014-12-01 01:30:21 +03:00
commitPattern = regexp . MustCompile ( ` (\s|^)https?.*commit/[0-9a-zA-Z]+(#+[0-9a-zA-Z-]*)? ` )
issueFullPattern = regexp . MustCompile ( ` (\s|^)https?.*issues/[0-9]+(#+[0-9a-zA-Z-]*)? ` )
2015-12-13 00:01:54 +03:00
issueIndexPattern = regexp . MustCompile ( ` (^|\W)#[0-9]+\b ` )
2014-12-01 01:30:21 +03:00
sha1CurrentPattern = regexp . MustCompile ( ` \b[0-9a-f] { 40}\b ` )
2014-04-10 22:20:58 +04:00
)
2015-11-16 01:37:26 +03:00
func cutoutVerbosePrefix ( prefix string ) string {
count := 0
for i := 0 ; i < len ( prefix ) ; i ++ {
if prefix [ i ] == '/' {
count ++
}
if count >= 3 {
return prefix [ : i ]
}
2015-11-16 00:22:25 +03:00
}
2015-11-16 01:37:26 +03:00
return prefix
}
2015-12-05 05:30:33 +03:00
func RenderIssueIndexPattern ( rawBytes [ ] byte , urlPrefix string , metas map [ string ] string ) [ ] byte {
2015-11-16 01:37:26 +03:00
urlPrefix = cutoutVerbosePrefix ( urlPrefix )
2014-12-01 01:30:21 +03:00
ms := issueIndexPattern . FindAll ( rawBytes , - 1 )
2014-04-10 22:20:58 +04:00
for _ , m := range ms {
2015-03-24 01:32:24 +03:00
var space string
m2 := m
2015-12-13 00:01:54 +03:00
if m2 [ 0 ] != '#' {
space = string ( m2 [ 0 ] )
2015-03-24 01:32:24 +03:00
m2 = m2 [ 1 : ]
}
2015-12-05 05:30:33 +03:00
if metas == nil {
rawBytes = bytes . Replace ( rawBytes , m , [ ] byte ( fmt . Sprintf ( ` %s<a href="%s/issues/%s">%s</a> ` ,
space , urlPrefix , m2 [ 1 : ] , m2 ) ) , 1 )
} else {
// Support for external issue tracker
metas [ "index" ] = string ( m2 [ 1 : ] )
rawBytes = bytes . Replace ( rawBytes , m , [ ] byte ( fmt . Sprintf ( ` %s<a href="%s">%s</a> ` ,
space , com . Expand ( metas [ "format" ] , metas ) , m2 ) ) , 1 )
}
}
return rawBytes
}
func RenderSpecialLink ( rawBytes [ ] byte , urlPrefix string , metas map [ string ] string ) [ ] byte {
ms := MentionPattern . FindAll ( rawBytes , - 1 )
for _ , m := range ms {
m = bytes . TrimSpace ( m )
rawBytes = bytes . Replace ( rawBytes , m ,
[ ] byte ( fmt . Sprintf ( ` <a href="%s/%s">%s</a> ` , setting . AppSubUrl , m [ 1 : ] , m ) ) , - 1 )
}
rawBytes = RenderIssueIndexPattern ( rawBytes , urlPrefix , metas )
rawBytes = RenderSha1CurrentPattern ( rawBytes , urlPrefix )
return rawBytes
}
func RenderSha1CurrentPattern ( rawBytes [ ] byte , urlPrefix string ) [ ] byte {
ms := sha1CurrentPattern . FindAll ( rawBytes , - 1 )
for _ , m := range ms {
rawBytes = bytes . Replace ( rawBytes , m , [ ] byte ( fmt . Sprintf (
` <a href="%s/commit/%s"><code>%s</code></a> ` , urlPrefix , m , ShortSha ( string ( m ) ) ) ) , - 1 )
2014-04-10 22:20:58 +04:00
}
return rawBytes
}
2014-05-05 21:08:01 +04:00
func RenderRawMarkdown ( body [ ] byte , urlPrefix string ) [ ] byte {
2014-04-10 22:20:58 +04:00
htmlFlags := 0
2014-10-05 01:15:22 +04:00
htmlFlags |= blackfriday . HTML_SKIP_STYLE
htmlFlags |= blackfriday . HTML_OMIT_CONTENTS
2014-04-10 22:20:58 +04:00
renderer := & CustomRender {
2014-10-05 01:15:22 +04:00
Renderer : blackfriday . HtmlRenderer ( htmlFlags , "" , "" ) ,
2014-04-10 22:20:58 +04:00
urlPrefix : urlPrefix ,
}
// set up the parser
extensions := 0
2014-10-05 01:15:22 +04:00
extensions |= blackfriday . EXTENSION_NO_INTRA_EMPHASIS
extensions |= blackfriday . EXTENSION_TABLES
extensions |= blackfriday . EXTENSION_FENCED_CODE
extensions |= blackfriday . EXTENSION_AUTOLINK
extensions |= blackfriday . EXTENSION_STRIKETHROUGH
extensions |= blackfriday . EXTENSION_SPACE_HEADERS
extensions |= blackfriday . EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK
2015-09-01 15:32:02 +03:00
if setting . Markdown . EnableHardLineBreak {
extensions |= blackfriday . EXTENSION_HARD_LINE_BREAK
}
2014-10-05 01:15:22 +04:00
body = blackfriday . Markdown ( body , renderer , extensions )
2014-05-05 21:08:01 +04:00
return body
}
2015-11-20 09:52:11 +03:00
var (
leftAngleBracket = [ ] byte ( "</" )
rightAngleBracket = [ ] byte ( ">" )
)
2015-11-20 13:37:51 +03:00
var noEndTags = [ ] string { "img" , "input" , "br" , "hr" }
2015-12-05 05:30:33 +03:00
// PreProcessMarkdown renders full links of commits, issues and pulls to shorter version.
func PreProcessMarkdown ( rawHTML [ ] byte , urlPrefix string ) [ ] byte {
ms := commitPattern . FindAll ( rawHTML , - 1 )
for _ , m := range ms {
m = bytes . TrimSpace ( m )
i := strings . Index ( string ( m ) , "commit/" )
j := strings . Index ( string ( m ) , "#" )
if j == - 1 {
j = len ( m )
}
rawHTML = bytes . Replace ( rawHTML , m , [ ] byte ( fmt . Sprintf (
` <code><a href="%s">%s</a></code> ` , m , ShortSha ( string ( m [ i + 7 : j ] ) ) ) ) , - 1 )
}
ms = issueFullPattern . FindAll ( rawHTML , - 1 )
for _ , m := range ms {
m = bytes . TrimSpace ( m )
i := strings . Index ( string ( m ) , "issues/" )
j := strings . Index ( string ( m ) , "#" )
if j == - 1 {
j = len ( m )
}
rawHTML = bytes . Replace ( rawHTML , m , [ ] byte ( fmt . Sprintf (
` <a href="%s">#%s</a> ` , m , ShortSha ( string ( m [ i + 7 : j ] ) ) ) ) , - 1 )
}
return rawHTML
}
2015-03-24 01:32:24 +03:00
// PostProcessMarkdown treats different types of HTML differently,
// and only renders special links for plain text blocks.
2015-12-05 05:30:33 +03:00
func PostProcessMarkdown ( rawHtml [ ] byte , urlPrefix string , metas map [ string ] string ) [ ] byte {
2015-11-20 13:37:51 +03:00
startTags := make ( [ ] string , 0 , 5 )
2015-03-09 07:14:50 +03:00
var buf bytes . Buffer
tokenizer := html . NewTokenizer ( bytes . NewReader ( rawHtml ) )
2015-11-20 13:37:51 +03:00
OUTER_LOOP :
2015-03-09 07:14:50 +03:00
for html . ErrorToken != tokenizer . Next ( ) {
token := tokenizer . Token ( )
switch token . Type {
2015-03-24 01:32:24 +03:00
case html . TextToken :
2015-12-05 05:30:33 +03:00
buf . Write ( RenderSpecialLink ( [ ] byte ( token . String ( ) ) , urlPrefix , metas ) )
2015-03-24 01:32:24 +03:00
case html . StartTagToken :
buf . WriteString ( token . String ( ) )
tagName := token . Data
// If this is an excluded tag, we skip processing all output until a close tag is encountered.
if strings . EqualFold ( "a" , tagName ) || strings . EqualFold ( "code" , tagName ) || strings . EqualFold ( "pre" , tagName ) {
2015-11-22 05:06:11 +03:00
stackNum := 1
2015-03-24 01:32:24 +03:00
for html . ErrorToken != tokenizer . Next ( ) {
token = tokenizer . Token ( )
2015-11-20 13:37:51 +03:00
2015-03-24 01:32:24 +03:00
// Copy the token to the output verbatim
buf . WriteString ( token . String ( ) )
2015-11-22 05:06:11 +03:00
if token . Type == html . StartTagToken {
stackNum ++
}
// If this is the close tag to the outer-most, we are done
2015-11-20 13:37:51 +03:00
if token . Type == html . EndTagToken && strings . EqualFold ( tagName , token . Data ) {
2015-11-22 05:06:11 +03:00
stackNum --
if stackNum == 0 {
break
}
2015-03-09 07:14:50 +03:00
}
}
2015-11-20 13:37:51 +03:00
continue OUTER_LOOP
}
if ! com . IsSliceContainsStr ( noEndTags , token . Data ) {
startTags = append ( startTags , token . Data )
2015-03-24 01:32:24 +03:00
}
2015-03-09 07:14:50 +03:00
2015-11-20 09:52:11 +03:00
case html . EndTagToken :
2015-11-25 03:28:24 +03:00
if len ( startTags ) == 0 {
2015-11-25 03:29:35 +03:00
buf . WriteString ( token . String ( ) )
2015-11-25 03:28:24 +03:00
break
}
2015-11-20 09:52:11 +03:00
buf . Write ( leftAngleBracket )
2015-11-20 13:37:51 +03:00
buf . WriteString ( startTags [ len ( startTags ) - 1 ] )
2015-11-20 09:52:11 +03:00
buf . Write ( rightAngleBracket )
2015-11-20 13:37:51 +03:00
startTags = startTags [ : len ( startTags ) - 1 ]
2015-03-24 01:32:24 +03:00
default :
buf . WriteString ( token . String ( ) )
2015-03-09 07:14:50 +03:00
}
}
if io . EOF == tokenizer . Err ( ) {
return buf . Bytes ( )
}
2015-03-24 01:32:24 +03:00
// If we are not at the end of the input, then some other parsing error has occurred,
// so return the input verbatim.
2015-03-09 07:14:50 +03:00
return rawHtml
}
2015-03-24 01:32:24 +03:00
2015-12-05 05:30:33 +03:00
func RenderMarkdown ( rawBytes [ ] byte , urlPrefix string , metas map [ string ] string ) [ ] byte {
result := PreProcessMarkdown ( rawBytes , urlPrefix )
result = RenderRawMarkdown ( result , urlPrefix )
result = PostProcessMarkdown ( result , urlPrefix , metas )
2015-03-24 01:32:24 +03:00
result = Sanitizer . SanitizeBytes ( result )
return result
}
2015-12-05 05:30:33 +03:00
func RenderMarkdownString ( raw , urlPrefix string , metas map [ string ] string ) string {
return string ( RenderMarkdown ( [ ] byte ( raw ) , urlPrefix , metas ) )
2015-03-24 01:32:24 +03:00
}