2017-09-16 20:17:57 +03:00
// Copyright 2017 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package markup
import (
"bytes"
2020-04-28 21:05:39 +03:00
"fmt"
2017-09-16 20:17:57 +03:00
"net/url"
"path"
"path/filepath"
"regexp"
"strings"
"code.gitea.io/gitea/modules/base"
2020-04-28 21:05:39 +03:00
"code.gitea.io/gitea/modules/emoji"
2019-08-14 11:04:55 +03:00
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log"
2019-12-31 04:53:28 +03:00
"code.gitea.io/gitea/modules/markup/common"
2019-10-14 01:29:10 +03:00
"code.gitea.io/gitea/modules/references"
2017-09-16 20:17:57 +03:00
"code.gitea.io/gitea/modules/setting"
2018-02-20 15:50:42 +03:00
"code.gitea.io/gitea/modules/util"
2017-09-16 20:17:57 +03:00
2019-08-23 19:40:30 +03:00
"github.com/unknwon/com"
2017-09-16 20:17:57 +03:00
"golang.org/x/net/html"
2018-02-27 10:09:18 +03:00
"golang.org/x/net/html/atom"
2019-03-27 14:15:23 +03:00
"mvdan.cc/xurls/v2"
2017-09-16 20:17:57 +03:00
)
// Issue name styles
const (
IssueNameStyleNumeric = "numeric"
IssueNameStyleAlphanumeric = "alphanumeric"
)
var (
// NOTE: All below regex matching do not perform any extra validation.
// Thus a link is produced even if the linked entity does not exist.
// While fast, this is also incorrect and lead to false positives.
// TODO: fix invalid linking issue
2018-02-27 10:09:18 +03:00
// sha1CurrentPattern matches string that represents a commit SHA, e.g. d8a994ef243349f321568f9e36d5c3f444b99cae
2017-09-16 20:17:57 +03:00
// Although SHA1 hashes are 40 chars long, the regex matches the hash from 7 to 40 chars in length
// so that abbreviated hash links can be used as well. This matches git and github useability.
2019-04-07 14:18:16 +03:00
sha1CurrentPattern = regexp . MustCompile ( ` (?:\s|^|\(|\[)([0-9a-f] { 7,40})(?:\s|$|\)|\]|\.(\s|$)) ` )
2017-09-16 20:17:57 +03:00
2018-02-27 10:09:18 +03:00
// shortLinkPattern matches short but difficult to parse [[name|link|arg=test]] syntax
shortLinkPattern = regexp . MustCompile ( ` \[\[(.*?)\]\](\w*) ` )
2017-09-16 20:17:57 +03:00
2018-02-27 10:09:18 +03:00
// anySHA1Pattern allows to split url containing SHA into parts
2019-04-06 21:28:45 +03:00
anySHA1Pattern = regexp . MustCompile ( ` https?://(?:\S+/) { 4}([0-9a-f] { 40})(/[^#\s]+)?(#\S+)? ` )
2017-09-16 20:17:57 +03:00
validLinksPattern = regexp . MustCompile ( ` ^[a-z][\w-]+:// ` )
2018-02-27 10:09:18 +03:00
// While this email regex is definitely not perfect and I'm sure you can come up
// with edge cases, it is still accepted by the CommonMark specification, as
// well as the HTML5 spec:
// http://spec.commonmark.org/0.28/#email-address
// https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail)
2019-04-25 04:53:42 +03:00
emailRegex = regexp . MustCompile ( "(?:\\s|^|\\(|\\[)([a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9]{2,}(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+)(?:\\s|$|\\)|\\]|\\.(\\s|$))" )
2018-02-27 10:09:18 +03:00
2019-12-24 01:38:50 +03:00
// blackfriday extensions create IDs like fn:user-content-footnote
blackfridayExtRegex = regexp . MustCompile ( ` [^:]*:user-content- ` )
2020-04-28 21:05:39 +03:00
// EmojiShortCodeRegex find emoji by alias like :smile:
EmojiShortCodeRegex = regexp . MustCompile ( ` \:[\w\+\-]+\: { 1} ` )
2017-09-16 20:17:57 +03:00
)
2019-10-14 01:29:10 +03:00
// CSS class for action keywords (e.g. "closes: #1")
const keywordClass = "issue-keyword"
2017-09-16 20:17:57 +03:00
// regexp for full links to issues/pulls
var issueFullPattern * regexp . Regexp
// IsLink reports whether link fits valid format.
func IsLink ( link [ ] byte ) bool {
return isLink ( link )
}
// isLink reports whether link fits valid format.
func isLink ( link [ ] byte ) bool {
return validLinksPattern . Match ( link )
}
2018-02-27 10:09:18 +03:00
func isLinkStr ( link string ) bool {
return validLinksPattern . MatchString ( link )
}
2017-09-16 20:17:57 +03:00
func getIssueFullPattern ( ) * regexp . Regexp {
if issueFullPattern == nil {
appURL := setting . AppURL
if len ( appURL ) > 0 && appURL [ len ( appURL ) - 1 ] != '/' {
appURL += "/"
}
issueFullPattern = regexp . MustCompile ( appURL +
` \w+/\w+/(?:issues|pulls)/((?:\w { 1,10}-)?[1-9][0-9]*)([\?|#]\S+.(\S+)?)?\b ` )
}
return issueFullPattern
}
2019-10-15 04:31:09 +03:00
// CustomLinkURLSchemes allows for additional schemes to be detected when parsing links within text
func CustomLinkURLSchemes ( schemes [ ] string ) {
schemes = append ( schemes , "http" , "https" )
withAuth := make ( [ ] string , 0 , len ( schemes ) )
validScheme := regexp . MustCompile ( ` ^[a-z]+$ ` )
for _ , s := range schemes {
if ! validScheme . MatchString ( s ) {
continue
}
without := false
for _ , sna := range xurls . SchemesNoAuthority {
if s == sna {
without = true
break
}
}
if without {
s += ":"
} else {
s += "://"
}
withAuth = append ( withAuth , s )
}
2019-12-31 04:53:28 +03:00
common . LinkRegex , _ = xurls . StrictMatchingScheme ( strings . Join ( withAuth , "|" ) )
2019-10-15 04:31:09 +03:00
}
2017-09-16 20:17:57 +03:00
// IsSameDomain checks if given url string has the same hostname as current Gitea instance
func IsSameDomain ( s string ) bool {
if strings . HasPrefix ( s , "/" ) {
return true
}
if uapp , err := url . Parse ( setting . AppURL ) ; err == nil {
if u , err := url . Parse ( s ) ; err == nil {
return u . Host == uapp . Host
}
return false
}
return false
}
2018-02-27 10:09:18 +03:00
type postProcessError struct {
context string
err error
}
func ( p * postProcessError ) Error ( ) string {
2019-06-12 22:41:28 +03:00
return "PostProcess: " + p . context + ", " + p . err . Error ( )
2018-02-27 10:09:18 +03:00
}
type processor func ( ctx * postProcessCtx , node * html . Node )
var defaultProcessors = [ ] processor {
fullIssuePatternProcessor ,
2019-03-27 18:37:54 +03:00
fullSha1PatternProcessor ,
shortLinkProcessor ,
linkProcessor ,
mentionProcessor ,
2018-02-27 10:09:18 +03:00
issueIndexPatternProcessor ,
sha1CurrentPatternProcessor ,
emailAddressProcessor ,
2020-04-28 21:05:39 +03:00
emojiProcessor ,
emojiShortCodeProcessor ,
2018-02-27 10:09:18 +03:00
}
type postProcessCtx struct {
metas map [ string ] string
urlPrefix string
isWikiMarkdown bool
// processors used by this context.
procs [ ] processor
}
// PostProcess does the final required transformations to the passed raw HTML
// data, and ensures its validity. Transformations include: replacing links and
// emails with HTML links, parsing shortlinks in the format of [[Link]], like
// MediaWiki, linking issues in the format #ID, and mentions in the format
// @user, and others.
func PostProcess (
rawHTML [ ] byte ,
urlPrefix string ,
metas map [ string ] string ,
isWikiMarkdown bool ,
) ( [ ] byte , error ) {
// create the context from the parameters
ctx := & postProcessCtx {
2019-03-07 22:13:44 +03:00
metas : metas ,
urlPrefix : urlPrefix ,
isWikiMarkdown : isWikiMarkdown ,
procs : defaultProcessors ,
2018-02-27 10:09:18 +03:00
}
return ctx . postProcess ( rawHTML )
}
var commitMessageProcessors = [ ] processor {
fullIssuePatternProcessor ,
2019-03-27 18:37:54 +03:00
fullSha1PatternProcessor ,
linkProcessor ,
mentionProcessor ,
2018-02-27 10:09:18 +03:00
issueIndexPatternProcessor ,
sha1CurrentPatternProcessor ,
emailAddressProcessor ,
2020-04-28 21:05:39 +03:00
emojiProcessor ,
emojiShortCodeProcessor ,
2018-02-27 10:09:18 +03:00
}
// RenderCommitMessage will use the same logic as PostProcess, but will disable
// the shortLinkProcessor and will add a defaultLinkProcessor if defaultLink is
// set, which changes every text node into a link to the passed default link.
func RenderCommitMessage (
rawHTML [ ] byte ,
urlPrefix , defaultLink string ,
metas map [ string ] string ,
) ( [ ] byte , error ) {
ctx := & postProcessCtx {
metas : metas ,
urlPrefix : urlPrefix ,
procs : commitMessageProcessors ,
}
if defaultLink != "" {
// we don't have to fear data races, because being
// commitMessageProcessors of fixed len and cap, every time we append
// something to it the slice is realloc+copied, so append always
// generates the slice ex-novo.
ctx . procs = append ( ctx . procs , genDefaultLinkProcessor ( defaultLink ) )
}
return ctx . postProcess ( rawHTML )
}
2019-09-10 12:03:30 +03:00
var commitMessageSubjectProcessors = [ ] processor {
fullIssuePatternProcessor ,
fullSha1PatternProcessor ,
linkProcessor ,
mentionProcessor ,
issueIndexPatternProcessor ,
sha1CurrentPatternProcessor ,
2020-04-28 21:05:39 +03:00
emojiShortCodeProcessor ,
emojiProcessor ,
}
var emojiProcessors = [ ] processor {
emojiShortCodeProcessor ,
emojiProcessor ,
2019-09-10 12:03:30 +03:00
}
// RenderCommitMessageSubject will use the same logic as PostProcess and
// RenderCommitMessage, but will disable the shortLinkProcessor and
// emailAddressProcessor, will add a defaultLinkProcessor if defaultLink is set,
// which changes every text node into a link to the passed default link.
func RenderCommitMessageSubject (
rawHTML [ ] byte ,
urlPrefix , defaultLink string ,
metas map [ string ] string ,
) ( [ ] byte , error ) {
ctx := & postProcessCtx {
metas : metas ,
urlPrefix : urlPrefix ,
procs : commitMessageSubjectProcessors ,
}
if defaultLink != "" {
// we don't have to fear data races, because being
// commitMessageSubjectProcessors of fixed len and cap, every time we
// append something to it the slice is realloc+copied, so append always
// generates the slice ex-novo.
ctx . procs = append ( ctx . procs , genDefaultLinkProcessor ( defaultLink ) )
}
return ctx . postProcess ( rawHTML )
}
2020-12-03 13:50:47 +03:00
// RenderIssueTitle to process title on individual issue/pull page
func RenderIssueTitle (
rawHTML [ ] byte ,
urlPrefix string ,
metas map [ string ] string ,
) ( [ ] byte , error ) {
ctx := & postProcessCtx {
metas : metas ,
urlPrefix : urlPrefix ,
procs : [ ] processor {
issueIndexPatternProcessor ,
sha1CurrentPatternProcessor ,
emojiShortCodeProcessor ,
emojiProcessor ,
} ,
}
return ctx . postProcess ( rawHTML )
}
2019-03-12 05:23:34 +03:00
// RenderDescriptionHTML will use similar logic as PostProcess, but will
// use a single special linkProcessor.
func RenderDescriptionHTML (
rawHTML [ ] byte ,
urlPrefix string ,
metas map [ string ] string ,
) ( [ ] byte , error ) {
ctx := & postProcessCtx {
metas : metas ,
urlPrefix : urlPrefix ,
procs : [ ] processor {
descriptionLinkProcessor ,
2020-05-01 20:58:45 +03:00
emojiShortCodeProcessor ,
emojiProcessor ,
2019-03-12 05:23:34 +03:00
} ,
}
return ctx . postProcess ( rawHTML )
}
2020-04-28 21:05:39 +03:00
// RenderEmoji for when we want to just process emoji and shortcodes
// in various places it isn't already run through the normal markdown procesor
func RenderEmoji (
rawHTML [ ] byte ,
) ( [ ] byte , error ) {
ctx := & postProcessCtx {
procs : emojiProcessors ,
}
return ctx . postProcess ( rawHTML )
}
2018-02-27 10:09:18 +03:00
var byteBodyTag = [ ] byte ( "<body>" )
var byteBodyTagClosing = [ ] byte ( "</body>" )
func ( ctx * postProcessCtx ) postProcess ( rawHTML [ ] byte ) ( [ ] byte , error ) {
if ctx . procs == nil {
ctx . procs = defaultProcessors
}
// give a generous extra 50 bytes
res := make ( [ ] byte , 0 , len ( rawHTML ) + 50 )
res = append ( res , byteBodyTag ... )
res = append ( res , rawHTML ... )
res = append ( res , byteBodyTagClosing ... )
// parse the HTML
nodes , err := html . ParseFragment ( bytes . NewReader ( res ) , nil )
if err != nil {
return nil , & postProcessError { "invalid HTML" , err }
}
for _ , node := range nodes {
2020-02-28 07:16:05 +03:00
ctx . visitNode ( node , true )
2018-02-27 10:09:18 +03:00
}
// Create buffer in which the data will be placed again. We know that the
// length will be at least that of res; to spare a few alloc+copy, we
// reuse res, resetting its length to 0.
buf := bytes . NewBuffer ( res [ : 0 ] )
// Render everything to buf.
for _ , node := range nodes {
err = html . Render ( buf , node )
if err != nil {
return nil , & postProcessError { "error rendering processed HTML" , err }
2017-09-16 20:17:57 +03:00
}
2018-02-27 10:09:18 +03:00
}
// remove initial parts - because Render creates a whole HTML page.
res = buf . Bytes ( )
res = res [ bytes . Index ( res , byteBodyTag ) + len ( byteBodyTag ) : bytes . LastIndex ( res , byteBodyTagClosing ) ]
// Everything done successfully, return parsed data.
return res , nil
}
2020-02-28 07:16:05 +03:00
func ( ctx * postProcessCtx ) visitNode ( node * html . Node , visitText bool ) {
2019-12-24 01:38:50 +03:00
// Add user-content- to IDs if they don't already have them
for idx , attr := range node . Attr {
if attr . Key == "id" && ! ( strings . HasPrefix ( attr . Val , "user-content-" ) || blackfridayExtRegex . MatchString ( attr . Val ) ) {
node . Attr [ idx ] . Val = "user-content-" + attr . Val
}
2020-04-28 21:05:39 +03:00
if attr . Key == "class" && attr . Val == "emoji" {
visitText = false
}
2019-12-24 01:38:50 +03:00
}
2020-04-28 21:05:39 +03:00
2018-02-27 10:09:18 +03:00
// We ignore code, pre and already generated links.
switch node . Type {
case html . TextNode :
2020-02-28 07:16:05 +03:00
if visitText {
ctx . textNode ( node )
}
2018-02-27 10:09:18 +03:00
case html . ElementNode :
2020-02-28 07:16:05 +03:00
if node . Data == "img" {
attrs := node . Attr
for idx , attr := range attrs {
if attr . Key != "src" {
continue
}
link := [ ] byte ( attr . Val )
if len ( link ) > 0 && ! IsLink ( link ) {
prefix := ctx . urlPrefix
if ctx . isWikiMarkdown {
prefix = util . URLJoin ( prefix , "wiki" , "raw" )
}
prefix = strings . Replace ( prefix , "/src/" , "/media/" , 1 )
lnk := string ( link )
lnk = util . URLJoin ( prefix , lnk )
link = [ ] byte ( lnk )
}
node . Attr [ idx ] . Val = string ( link )
}
} else if node . Data == "a" {
visitText = false
} else if node . Data == "code" || node . Data == "pre" {
2018-02-27 10:09:18 +03:00
return
2020-04-24 16:22:36 +03:00
} else if node . Data == "i" {
for _ , attr := range node . Attr {
if attr . Key != "class" {
continue
}
classes := strings . Split ( attr . Val , " " )
for i , class := range classes {
if class == "icon" {
classes [ 0 ] , classes [ i ] = classes [ i ] , classes [ 0 ]
attr . Val = strings . Join ( classes , " " )
// Remove all children of icons
child := node . FirstChild
for child != nil {
node . RemoveChild ( child )
child = node . FirstChild
}
break
}
}
}
2017-09-16 20:17:57 +03:00
}
2018-02-27 10:09:18 +03:00
for n := node . FirstChild ; n != nil ; n = n . NextSibling {
2020-02-28 07:16:05 +03:00
ctx . visitNode ( n , visitText )
2017-09-16 20:17:57 +03:00
}
2018-02-27 10:09:18 +03:00
}
// ignore everything else
}
// textNode runs the passed node through various processors, in order to handle
// all kinds of special links handled by the post-processing.
func ( ctx * postProcessCtx ) textNode ( node * html . Node ) {
for _ , processor := range ctx . procs {
processor ( ctx , node )
}
2017-09-16 20:17:57 +03:00
}
2019-10-14 01:29:10 +03:00
// createKeyword() renders a highlighted version of an action keyword
func createKeyword ( content string ) * html . Node {
span := & html . Node {
Type : html . ElementNode ,
Data : atom . Span . String ( ) ,
Attr : [ ] html . Attribute { } ,
}
span . Attr = append ( span . Attr , html . Attribute { Key : "class" , Val : keywordClass } )
text := & html . Node {
Type : html . TextNode ,
Data : content ,
}
span . AppendChild ( text )
return span
}
2020-04-28 21:05:39 +03:00
func createEmoji ( content , class , name string ) * html . Node {
span := & html . Node {
Type : html . ElementNode ,
Data : atom . Span . String ( ) ,
Attr : [ ] html . Attribute { } ,
}
if class != "" {
span . Attr = append ( span . Attr , html . Attribute { Key : "class" , Val : class } )
}
if name != "" {
span . Attr = append ( span . Attr , html . Attribute { Key : "aria-label" , Val : name } )
}
text := & html . Node {
Type : html . TextNode ,
Data : content ,
}
span . AppendChild ( text )
return span
}
func createCustomEmoji ( alias , class string ) * html . Node {
span := & html . Node {
Type : html . ElementNode ,
Data : atom . Span . String ( ) ,
Attr : [ ] html . Attribute { } ,
}
if class != "" {
span . Attr = append ( span . Attr , html . Attribute { Key : "class" , Val : class } )
span . Attr = append ( span . Attr , html . Attribute { Key : "aria-label" , Val : alias } )
}
img := & html . Node {
Type : html . ElementNode ,
DataAtom : atom . Img ,
Data : "img" ,
Attr : [ ] html . Attribute { } ,
}
if class != "" {
2020-08-24 04:44:53 +03:00
img . Attr = append ( img . Attr , html . Attribute { Key : "alt" , Val : fmt . Sprintf ( ` :%s: ` , alias ) } )
2020-04-28 21:05:39 +03:00
img . Attr = append ( img . Attr , html . Attribute { Key : "src" , Val : fmt . Sprintf ( ` %s/img/emoji/%s.png ` , setting . StaticURLPrefix , alias ) } )
}
span . AppendChild ( img )
return span
}
2019-09-10 12:03:30 +03:00
func createLink ( href , content , class string ) * html . Node {
2019-04-09 06:18:48 +03:00
a := & html . Node {
Type : html . ElementNode ,
Data : atom . A . String ( ) ,
Attr : [ ] html . Attribute { { Key : "href" , Val : href } } ,
}
2019-09-10 12:03:30 +03:00
if class != "" {
a . Attr = append ( a . Attr , html . Attribute { Key : "class" , Val : class } )
}
2019-04-09 06:18:48 +03:00
text := & html . Node {
2018-02-27 10:09:18 +03:00
Type : html . TextNode ,
Data : content ,
}
2019-04-09 06:18:48 +03:00
a . AppendChild ( text )
return a
}
2019-09-10 12:03:30 +03:00
func createCodeLink ( href , content , class string ) * html . Node {
2019-04-09 06:18:48 +03:00
a := & html . Node {
Type : html . ElementNode ,
Data : atom . A . String ( ) ,
Attr : [ ] html . Attribute { { Key : "href" , Val : href } } ,
}
2019-09-10 12:03:30 +03:00
if class != "" {
a . Attr = append ( a . Attr , html . Attribute { Key : "class" , Val : class } )
}
2019-04-09 06:18:48 +03:00
text := & html . Node {
Type : html . TextNode ,
Data : content ,
2017-09-16 20:17:57 +03:00
}
2019-04-09 06:18:48 +03:00
code := & html . Node {
Type : html . ElementNode ,
Data : atom . Code . String ( ) ,
2019-06-19 01:31:31 +03:00
Attr : [ ] html . Attribute { { Key : "class" , Val : "nohighlight" } } ,
2019-04-09 06:18:48 +03:00
}
code . AppendChild ( text )
a . AppendChild ( code )
return a
2017-09-16 20:17:57 +03:00
}
2019-10-14 01:29:10 +03:00
// replaceContent takes text node, and in its content it replaces a section of
// it with the specified newNode.
2018-02-27 10:09:18 +03:00
func replaceContent ( node * html . Node , i , j int , newNode * html . Node ) {
2019-10-14 01:29:10 +03:00
replaceContentList ( node , i , j , [ ] * html . Node { newNode } )
}
// replaceContentList takes text node, and in its content it replaces a section of
// it with the specified newNodes. An example to visualize how this can work can
// be found here: https://play.golang.org/p/5zP8NnHZ03s
func replaceContentList ( node * html . Node , i , j int , newNodes [ ] * html . Node ) {
2018-02-27 10:09:18 +03:00
// get the data before and after the match
before := node . Data [ : i ]
after := node . Data [ j : ]
// Replace in the current node the text, so that it is only what it is
// supposed to have.
node . Data = before
// Get the current next sibling, before which we place the replaced data,
// and after that we place the new text node.
nextSibling := node . NextSibling
2019-10-14 01:29:10 +03:00
for _ , n := range newNodes {
node . Parent . InsertBefore ( n , nextSibling )
}
2018-02-27 10:09:18 +03:00
if after != "" {
node . Parent . InsertBefore ( & html . Node {
Type : html . TextNode ,
Data : after ,
} , nextSibling )
2017-09-16 20:17:57 +03:00
}
}
2019-11-24 19:34:44 +03:00
func mentionProcessor ( ctx * postProcessCtx , node * html . Node ) {
2019-10-14 01:29:10 +03:00
// We replace only the first mention; other mentions will be addressed later
found , loc := references . FindFirstMentionBytes ( [ ] byte ( node . Data ) )
if ! found {
2018-02-27 10:09:18 +03:00
return
}
2019-10-14 01:29:10 +03:00
mention := node . Data [ loc . Start : loc . End ]
2019-11-24 19:34:44 +03:00
var teams string
teams , ok := ctx . metas [ "teams" ]
if ok && strings . Contains ( teams , "," + strings . ToLower ( mention [ 1 : ] ) + "," ) {
replaceContent ( node , loc . Start , loc . End , createLink ( util . URLJoin ( setting . AppURL , "org" , ctx . metas [ "org" ] , "teams" , mention [ 1 : ] ) , mention , "mention" ) )
} else {
replaceContent ( node , loc . Start , loc . End , createLink ( util . URLJoin ( setting . AppURL , mention [ 1 : ] ) , mention , "mention" ) )
}
2018-02-27 10:09:18 +03:00
}
func shortLinkProcessor ( ctx * postProcessCtx , node * html . Node ) {
shortLinkProcessorFull ( ctx , node , false )
}
func shortLinkProcessorFull ( ctx * postProcessCtx , node * html . Node , noLink bool ) {
m := shortLinkPattern . FindStringSubmatchIndex ( node . Data )
if m == nil {
return
}
2017-09-16 20:17:57 +03:00
2018-02-27 10:09:18 +03:00
content := node . Data [ m [ 2 ] : m [ 3 ] ]
tail := node . Data [ m [ 4 ] : m [ 5 ] ]
props := make ( map [ string ] string )
// MediaWiki uses [[link|text]], while GitHub uses [[text|link]]
// It makes page handling terrible, but we prefer GitHub syntax
// And fall back to MediaWiki only when it is obvious from the look
// Of text and link contents
sl := strings . Split ( content , "|" )
for _ , v := range sl {
if equalPos := strings . IndexByte ( v , '=' ) ; equalPos == - 1 {
// There is no equal in this argument; this is a mandatory arg
if props [ "name" ] == "" {
if isLinkStr ( v ) {
// If we clearly see it is a link, we save it so
// But first we need to ensure, that if both mandatory args provided
// look like links, we stick to GitHub syntax
if props [ "link" ] != "" {
props [ "name" ] = props [ "link" ]
2017-09-16 20:17:57 +03:00
}
2018-02-27 10:09:18 +03:00
props [ "link" ] = strings . TrimSpace ( v )
} else {
props [ "name" ] = v
2017-09-16 20:17:57 +03:00
}
2018-02-27 10:09:18 +03:00
} else {
props [ "link" ] = strings . TrimSpace ( v )
}
} else {
// There is an equal; optional argument.
sep := strings . IndexByte ( v , '=' )
key , val := v [ : sep ] , html . UnescapeString ( v [ sep + 1 : ] )
// When parsing HTML, x/net/html will change all quotes which are
// not used for syntax into UTF-8 quotes. So checking val[0] won't
// be enough, since that only checks a single byte.
2020-12-04 05:01:42 +03:00
if len ( val ) > 1 {
if ( strings . HasPrefix ( val , "“" ) && strings . HasSuffix ( val , "”" ) ) ||
( strings . HasPrefix ( val , "‘ " ) && strings . HasSuffix ( val , "’ " ) ) {
const lenQuote = len ( "‘ " )
val = val [ lenQuote : len ( val ) - lenQuote ]
} else if ( strings . HasPrefix ( val , "\"" ) && strings . HasSuffix ( val , "\"" ) ) ||
( strings . HasPrefix ( val , "'" ) && strings . HasSuffix ( val , "'" ) ) {
val = val [ 1 : len ( val ) - 1 ]
} else if strings . HasPrefix ( val , "'" ) && strings . HasSuffix ( val , "’ " ) {
const lenQuote = len ( "‘ " )
val = val [ 1 : len ( val ) - lenQuote ]
}
2017-09-16 20:17:57 +03:00
}
2018-02-27 10:09:18 +03:00
props [ key ] = val
2017-09-16 20:17:57 +03:00
}
2018-02-27 10:09:18 +03:00
}
2017-09-16 20:17:57 +03:00
2018-02-27 10:09:18 +03:00
var name , link string
if props [ "link" ] != "" {
link = props [ "link" ]
} else if props [ "name" ] != "" {
link = props [ "name" ]
}
if props [ "title" ] != "" {
name = props [ "title" ]
} else if props [ "name" ] != "" {
name = props [ "name" ]
} else {
name = link
}
name += tail
image := false
2019-07-23 21:50:39 +03:00
switch ext := filepath . Ext ( link ) ; ext {
2018-02-27 10:09:18 +03:00
// fast path: empty string, ignore
case "" :
break
case ".jpg" , ".jpeg" , ".png" , ".tif" , ".tiff" , ".webp" , ".gif" , ".bmp" , ".ico" , ".svg" :
image = true
}
childNode := & html . Node { }
linkNode := & html . Node {
FirstChild : childNode ,
LastChild : childNode ,
Type : html . ElementNode ,
Data : "a" ,
DataAtom : atom . A ,
}
childNode . Parent = linkNode
absoluteLink := isLinkStr ( link )
if ! absoluteLink {
2018-03-05 21:39:12 +03:00
if image {
2020-10-11 23:27:20 +03:00
link = strings . ReplaceAll ( link , " " , "+" )
2018-03-05 21:39:12 +03:00
} else {
2020-10-11 23:27:20 +03:00
link = strings . ReplaceAll ( link , " " , "-" )
2018-03-05 21:39:12 +03:00
}
2018-06-15 15:42:49 +03:00
if ! strings . Contains ( link , "/" ) {
link = url . PathEscape ( link )
}
2018-02-27 10:09:18 +03:00
}
urlPrefix := ctx . urlPrefix
if image {
if ! absoluteLink {
if IsSameDomain ( urlPrefix ) {
urlPrefix = strings . Replace ( urlPrefix , "/src/" , "/raw/" , 1 )
}
if ctx . isWikiMarkdown {
link = util . URLJoin ( "wiki" , "raw" , link )
}
link = util . URLJoin ( urlPrefix , link )
2017-09-16 20:17:57 +03:00
}
2018-02-27 10:09:18 +03:00
title := props [ "title" ]
if title == "" {
title = props [ "alt" ]
}
if title == "" {
2019-07-23 21:50:39 +03:00
title = path . Base ( name )
2018-02-27 10:09:18 +03:00
}
alt := props [ "alt" ]
if alt == "" {
alt = name
2017-09-16 20:17:57 +03:00
}
2018-02-27 10:09:18 +03:00
// make the childNode an image - if we can, we also place the alt
childNode . Type = html . ElementNode
childNode . Data = "img"
childNode . DataAtom = atom . Img
childNode . Attr = [ ] html . Attribute {
{ Key : "src" , Val : link } ,
{ Key : "title" , Val : title } ,
{ Key : "alt" , Val : alt } ,
2017-09-16 20:17:57 +03:00
}
2018-02-27 10:09:18 +03:00
if alt == "" {
childNode . Attr = childNode . Attr [ : 2 ]
2017-09-16 20:17:57 +03:00
}
2018-02-27 10:09:18 +03:00
} else {
if ! absoluteLink {
if ctx . isWikiMarkdown {
2018-02-20 15:50:42 +03:00
link = util . URLJoin ( "wiki" , link )
2017-09-16 20:17:57 +03:00
}
2018-02-20 15:50:42 +03:00
link = util . URLJoin ( urlPrefix , link )
2017-09-16 20:17:57 +03:00
}
2018-02-27 10:09:18 +03:00
childNode . Type = html . TextNode
childNode . Data = name
2017-09-16 20:17:57 +03:00
}
2018-02-27 10:09:18 +03:00
if noLink {
linkNode = childNode
} else {
linkNode . Attr = [ ] html . Attribute { { Key : "href" , Val : link } }
}
replaceContent ( node , m [ 0 ] , m [ 1 ] , linkNode )
2017-09-16 20:17:57 +03:00
}
2018-02-27 10:09:18 +03:00
func fullIssuePatternProcessor ( ctx * postProcessCtx , node * html . Node ) {
2019-04-12 08:53:34 +03:00
if ctx . metas == nil {
return
}
2018-02-27 10:09:18 +03:00
m := getIssueFullPattern ( ) . FindStringSubmatchIndex ( node . Data )
if m == nil {
return
}
link := node . Data [ m [ 0 ] : m [ 1 ] ]
id := "#" + node . Data [ m [ 2 ] : m [ 3 ] ]
2019-04-12 08:53:34 +03:00
// extract repo and org name from matched link like
// http://localhost:3000/gituser/myrepo/issues/1
linkParts := strings . Split ( path . Clean ( link ) , "/" )
matchOrg := linkParts [ len ( linkParts ) - 4 ]
matchRepo := linkParts [ len ( linkParts ) - 3 ]
if matchOrg == ctx . metas [ "user" ] && matchRepo == ctx . metas [ "repo" ] {
// TODO if m[4]:m[5] is not nil, then link is to a comment,
// and we should indicate that in the text somehow
2020-01-20 07:39:21 +03:00
replaceContent ( node , m [ 0 ] , m [ 1 ] , createLink ( link , id , "ref-issue" ) )
2019-04-12 08:53:34 +03:00
} else {
orgRepoID := matchOrg + "/" + matchRepo + id
2020-01-20 07:39:21 +03:00
replaceContent ( node , m [ 0 ] , m [ 1 ] , createLink ( link , orgRepoID , "ref-issue" ) )
2019-04-12 08:53:34 +03:00
}
2018-02-27 10:09:18 +03:00
}
2017-09-16 20:17:57 +03:00
2018-02-27 10:09:18 +03:00
func issueIndexPatternProcessor ( ctx * postProcessCtx , node * html . Node ) {
2019-04-12 08:53:34 +03:00
if ctx . metas == nil {
return
}
2019-10-14 01:29:10 +03:00
var (
found bool
ref * references . RenderizableReference
)
2019-12-01 16:57:05 +03:00
_ , exttrack := ctx . metas [ "format" ]
alphanum := ctx . metas [ "style" ] == IssueNameStyleAlphanumeric
// Repos with external issue trackers might still need to reference local PRs
// We need to concern with the first one that shows up in the text, whichever it is
found , ref = references . FindRenderizableReferenceNumeric ( node . Data , exttrack && alphanum )
if exttrack && alphanum {
if found2 , ref2 := references . FindRenderizableReferenceAlphanumeric ( node . Data ) ; found2 {
if ! found || ref2 . RefLocation . Start < ref . RefLocation . Start {
found = true
ref = ref2
}
}
2017-09-16 20:17:57 +03:00
}
2019-10-14 01:29:10 +03:00
if ! found {
2018-02-27 10:09:18 +03:00
return
}
2019-04-12 08:53:34 +03:00
2018-02-27 10:09:18 +03:00
var link * html . Node
2019-10-14 01:29:10 +03:00
reftext := node . Data [ ref . RefLocation . Start : ref . RefLocation . End ]
2019-12-01 16:57:05 +03:00
if exttrack && ! ref . IsPull {
2019-10-14 01:29:10 +03:00
ctx . metas [ "index" ] = ref . Issue
2020-01-20 07:39:21 +03:00
link = createLink ( com . Expand ( ctx . metas [ "format" ] , ctx . metas ) , reftext , "ref-issue" )
2019-04-12 08:53:34 +03:00
} else {
2019-12-01 16:57:05 +03:00
// Path determines the type of link that will be rendered. It's unknown at this point whether
// the linked item is actually a PR or an issue. Luckily it's of no real consequence because
// Gitea will redirect on click as appropriate.
path := "issues"
if ref . IsPull {
path = "pulls"
}
if ref . Owner == "" {
2020-01-20 07:39:21 +03:00
link = createLink ( util . URLJoin ( setting . AppURL , ctx . metas [ "user" ] , ctx . metas [ "repo" ] , path , ref . Issue ) , reftext , "ref-issue" )
2019-12-01 16:57:05 +03:00
} else {
2020-01-20 07:39:21 +03:00
link = createLink ( util . URLJoin ( setting . AppURL , ref . Owner , ref . Name , path , ref . Issue ) , reftext , "ref-issue" )
2019-12-01 16:57:05 +03:00
}
2018-02-27 10:09:18 +03:00
}
2017-09-16 20:17:57 +03:00
2019-10-14 01:29:10 +03:00
if ref . Action == references . XRefActionNone {
replaceContent ( node , ref . RefLocation . Start , ref . RefLocation . End , link )
2018-02-27 10:09:18 +03:00
return
}
2019-11-18 16:13:07 +03:00
// Decorate action keywords if actionable
var keyword * html . Node
2019-12-01 16:57:05 +03:00
if references . IsXrefActionable ( ref , exttrack , alphanum ) {
2019-11-18 16:13:07 +03:00
keyword = createKeyword ( node . Data [ ref . ActionLocation . Start : ref . ActionLocation . End ] )
} else {
keyword = & html . Node {
Type : html . TextNode ,
Data : node . Data [ ref . ActionLocation . Start : ref . ActionLocation . End ] ,
}
}
2019-10-14 01:29:10 +03:00
spaces := & html . Node {
Type : html . TextNode ,
Data : node . Data [ ref . ActionLocation . End : ref . RefLocation . Start ] ,
}
replaceContentList ( node , ref . ActionLocation . Start , ref . RefLocation . End , [ ] * html . Node { keyword , spaces , link } )
2017-09-16 20:17:57 +03:00
}
2018-02-27 10:09:18 +03:00
// fullSha1PatternProcessor renders SHA containing URLs
func fullSha1PatternProcessor ( ctx * postProcessCtx , node * html . Node ) {
2019-04-16 10:53:57 +03:00
if ctx . metas == nil {
return
}
2018-02-27 10:09:18 +03:00
m := anySHA1Pattern . FindStringSubmatchIndex ( node . Data )
if m == nil {
return
}
2019-04-06 21:28:45 +03:00
2018-02-27 10:09:18 +03:00
urlFull := node . Data [ m [ 0 ] : m [ 1 ] ]
2019-04-06 21:28:45 +03:00
text := base . ShortSha ( node . Data [ m [ 2 ] : m [ 3 ] ] )
2017-09-16 20:17:57 +03:00
2019-04-06 21:28:45 +03:00
// 3rd capture group matches a optional path
subpath := ""
if m [ 5 ] > 0 {
subpath = node . Data [ m [ 4 ] : m [ 5 ] ]
}
2017-09-16 20:17:57 +03:00
2019-04-06 21:28:45 +03:00
// 4th capture group matches a optional url hash
hash := ""
2018-02-27 10:09:18 +03:00
if m [ 7 ] > 0 {
2019-04-06 21:28:45 +03:00
hash = node . Data [ m [ 6 ] : m [ 7 ] ] [ 1 : ]
2018-02-27 10:09:18 +03:00
}
2019-04-06 21:28:45 +03:00
start := m [ 0 ]
end := m [ 1 ]
// If url ends in '.', it's very likely that it is not part of the
// actual url but used to finish a sentence.
if strings . HasSuffix ( urlFull , "." ) {
end --
urlFull = urlFull [ : len ( urlFull ) - 1 ]
if hash != "" {
hash = hash [ : len ( hash ) - 1 ]
} else if subpath != "" {
subpath = subpath [ : len ( subpath ) - 1 ]
}
2018-02-27 10:09:18 +03:00
}
2017-09-16 20:17:57 +03:00
2019-04-06 21:28:45 +03:00
if subpath != "" {
text += subpath
2018-02-27 10:09:18 +03:00
}
2019-04-06 21:28:45 +03:00
if hash != "" {
text += " (" + hash + ")"
2018-02-27 10:09:18 +03:00
}
2017-09-16 20:17:57 +03:00
2019-09-10 12:03:30 +03:00
replaceContent ( node , start , end , createCodeLink ( urlFull , text , "commit" ) )
2018-02-27 10:09:18 +03:00
}
2017-09-16 20:17:57 +03:00
2020-04-28 21:05:39 +03:00
// emojiShortCodeProcessor for rendering text like :smile: into emoji
func emojiShortCodeProcessor ( ctx * postProcessCtx , node * html . Node ) {
m := EmojiShortCodeRegex . FindStringSubmatchIndex ( node . Data )
if m == nil {
return
}
alias := node . Data [ m [ 0 ] : m [ 1 ] ]
2020-10-11 23:27:20 +03:00
alias = strings . ReplaceAll ( alias , ":" , "" )
2020-04-28 21:05:39 +03:00
converted := emoji . FromAlias ( alias )
if converted == nil {
// check if this is a custom reaction
s := strings . Join ( setting . UI . Reactions , " " ) + "gitea"
if strings . Contains ( s , alias ) {
replaceContent ( node , m [ 0 ] , m [ 1 ] , createCustomEmoji ( alias , "emoji" ) )
return
}
return
}
replaceContent ( node , m [ 0 ] , m [ 1 ] , createEmoji ( converted . Emoji , "emoji" , converted . Description ) )
}
// emoji processor to match emoji and add emoji class
func emojiProcessor ( ctx * postProcessCtx , node * html . Node ) {
2020-05-29 19:08:36 +03:00
m := emoji . FindEmojiSubmatchIndex ( node . Data )
2020-04-28 21:05:39 +03:00
if m == nil {
return
}
codepoint := node . Data [ m [ 0 ] : m [ 1 ] ]
val := emoji . FromCode ( codepoint )
if val != nil {
replaceContent ( node , m [ 0 ] , m [ 1 ] , createEmoji ( codepoint , "emoji" , val . Description ) )
}
}
2018-02-27 10:09:18 +03:00
// sha1CurrentPatternProcessor renders SHA1 strings to corresponding links that
// are assumed to be in the same repository.
func sha1CurrentPatternProcessor ( ctx * postProcessCtx , node * html . Node ) {
2019-08-14 11:04:55 +03:00
if ctx . metas == nil || ctx . metas [ "user" ] == "" || ctx . metas [ "repo" ] == "" || ctx . metas [ "repoPath" ] == "" {
return
}
2018-02-27 10:09:18 +03:00
m := sha1CurrentPattern . FindStringSubmatchIndex ( node . Data )
if m == nil {
return
}
hash := node . Data [ m [ 2 ] : m [ 3 ] ]
// The regex does not lie, it matches the hash pattern.
// However, a regex cannot know if a hash actually exists or not.
// We could assume that a SHA1 hash should probably contain alphas AND numerics
// but that is not always the case.
// Although unlikely, deadbeef and 1234567 are valid short forms of SHA1 hash
// as used by git and github for linking and thus we have to do similar.
2019-08-14 11:04:55 +03:00
// Because of this, we check to make sure that a matched hash is actually
// a commit in the repository before making it a link.
if _ , err := git . NewCommand ( "rev-parse" , "--verify" , hash ) . RunInDirBytes ( ctx . metas [ "repoPath" ] ) ; err != nil {
if ! strings . Contains ( err . Error ( ) , "fatal: Needed a single revision" ) {
log . Debug ( "sha1CurrentPatternProcessor git rev-parse: %v" , err )
}
return
}
2018-02-27 10:09:18 +03:00
replaceContent ( node , m [ 2 ] , m [ 3 ] ,
2019-09-10 12:03:30 +03:00
createCodeLink ( util . URLJoin ( setting . AppURL , ctx . metas [ "user" ] , ctx . metas [ "repo" ] , "commit" , hash ) , base . ShortSha ( hash ) , "commit" ) )
2018-02-27 10:09:18 +03:00
}
2017-09-16 20:17:57 +03:00
2018-02-27 10:09:18 +03:00
// emailAddressProcessor replaces raw email addresses with a mailto: link.
func emailAddressProcessor ( ctx * postProcessCtx , node * html . Node ) {
2019-04-07 14:18:16 +03:00
m := emailRegex . FindStringSubmatchIndex ( node . Data )
2018-02-27 10:09:18 +03:00
if m == nil {
return
2017-09-16 20:17:57 +03:00
}
2019-04-07 14:18:16 +03:00
mail := node . Data [ m [ 2 ] : m [ 3 ] ]
2019-09-10 12:03:30 +03:00
replaceContent ( node , m [ 2 ] , m [ 3 ] , createLink ( "mailto:" + mail , mail , "mailto" ) )
2018-02-27 10:09:18 +03:00
}
2017-09-16 20:17:57 +03:00
2018-02-27 10:09:18 +03:00
// linkProcessor creates links for any HTTP or HTTPS URL not captured by
// markdown.
func linkProcessor ( ctx * postProcessCtx , node * html . Node ) {
2019-12-31 04:53:28 +03:00
m := common . LinkRegex . FindStringIndex ( node . Data )
2018-02-27 10:09:18 +03:00
if m == nil {
return
2017-09-16 20:17:57 +03:00
}
2018-02-27 10:09:18 +03:00
uri := node . Data [ m [ 0 ] : m [ 1 ] ]
2019-09-10 12:03:30 +03:00
replaceContent ( node , m [ 0 ] , m [ 1 ] , createLink ( uri , uri , "link" ) )
2018-02-27 10:09:18 +03:00
}
func genDefaultLinkProcessor ( defaultLink string ) processor {
return func ( ctx * postProcessCtx , node * html . Node ) {
ch := & html . Node {
Parent : node ,
Type : html . TextNode ,
Data : node . Data ,
}
2017-09-16 20:17:57 +03:00
2018-02-27 10:09:18 +03:00
node . Type = html . ElementNode
node . Data = "a"
node . DataAtom = atom . A
2019-09-10 12:03:30 +03:00
node . Attr = [ ] html . Attribute {
{ Key : "href" , Val : defaultLink } ,
{ Key : "class" , Val : "default-link" } ,
}
2018-02-27 10:09:18 +03:00
node . FirstChild , node . LastChild = ch , ch
}
2017-09-16 20:17:57 +03:00
}
2019-03-12 05:23:34 +03:00
// descriptionLinkProcessor creates links for DescriptionHTML
func descriptionLinkProcessor ( ctx * postProcessCtx , node * html . Node ) {
2019-12-31 04:53:28 +03:00
m := common . LinkRegex . FindStringIndex ( node . Data )
2019-03-12 05:23:34 +03:00
if m == nil {
return
}
uri := node . Data [ m [ 0 ] : m [ 1 ] ]
replaceContent ( node , m [ 0 ] , m [ 1 ] , createDescriptionLink ( uri , uri ) )
}
func createDescriptionLink ( href , content string ) * html . Node {
textNode := & html . Node {
Type : html . TextNode ,
Data : content ,
}
linkNode := & html . Node {
FirstChild : textNode ,
LastChild : textNode ,
Type : html . ElementNode ,
Data : "a" ,
DataAtom : atom . A ,
Attr : [ ] html . Attribute {
{ Key : "href" , Val : href } ,
{ Key : "target" , Val : "_blank" } ,
{ Key : "rel" , Val : "noopener noreferrer" } ,
} ,
}
textNode . Parent = linkNode
return linkNode
}