2019-10-14 01:29:10 +03:00
// Copyright 2019 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package mdstripper
import (
"bytes"
2020-08-06 22:20:05 +03:00
"net/url"
"strings"
2019-12-31 04:53:28 +03:00
"sync"
2019-10-14 01:29:10 +03:00
2019-12-31 04:53:28 +03:00
"io"
2019-10-14 01:29:10 +03:00
2019-12-31 04:53:28 +03:00
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/markup/common"
2020-08-06 22:20:05 +03:00
"code.gitea.io/gitea/modules/setting"
2019-10-14 01:29:10 +03:00
2019-12-31 04:53:28 +03:00
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/renderer"
"github.com/yuin/goldmark/renderer/html"
"github.com/yuin/goldmark/text"
2019-10-14 01:29:10 +03:00
)
2020-08-06 22:20:05 +03:00
var (
giteaHostInit sync . Once
giteaHost * url . URL
)
2019-12-31 04:53:28 +03:00
type stripRenderer struct {
2020-08-06 22:20:05 +03:00
localhost * url . URL
links [ ] string
empty bool
2019-10-14 01:29:10 +03:00
}
2019-12-31 04:53:28 +03:00
func ( r * stripRenderer ) Render ( w io . Writer , source [ ] byte , doc ast . Node ) error {
return ast . Walk ( doc , func ( n ast . Node , entering bool ) ( ast . WalkStatus , error ) {
if ! entering {
return ast . WalkContinue , nil
}
switch v := n . ( type ) {
case * ast . Text :
if ! v . IsRaw ( ) {
_ , prevSibIsText := n . PreviousSibling ( ) . ( * ast . Text )
coalesce := prevSibIsText
r . processString (
w ,
v . Text ( source ) ,
coalesce )
if v . SoftLineBreak ( ) {
r . doubleSpace ( w )
}
}
return ast . WalkContinue , nil
case * ast . Link :
r . processLink ( w , v . Destination )
return ast . WalkSkipChildren , nil
case * ast . AutoLink :
2020-08-06 22:20:05 +03:00
// This could be a reference to an issue or pull - if so convert it
r . processAutoLink ( w , v . URL ( source ) )
2019-12-31 04:53:28 +03:00
return ast . WalkSkipChildren , nil
}
return ast . WalkContinue , nil
2019-10-31 04:06:25 +03:00
} )
2019-10-14 01:29:10 +03:00
}
2019-12-31 04:53:28 +03:00
func ( r * stripRenderer ) doubleSpace ( w io . Writer ) {
2019-10-31 04:06:25 +03:00
if ! r . empty {
_ , _ = w . Write ( [ ] byte { '\n' } )
2019-10-14 01:29:10 +03:00
}
}
2019-12-31 04:53:28 +03:00
func ( r * stripRenderer ) processString ( w io . Writer , text [ ] byte , coalesce bool ) {
2019-10-14 01:29:10 +03:00
// Always break-up words
2019-12-31 04:53:28 +03:00
if ! coalesce {
2019-10-31 04:06:25 +03:00
r . doubleSpace ( w )
2019-10-14 01:29:10 +03:00
}
2019-10-31 04:06:25 +03:00
_ , _ = w . Write ( text )
r . empty = false
2019-10-14 01:29:10 +03:00
}
2019-10-31 04:06:25 +03:00
2020-08-06 22:20:05 +03:00
// ProcessAutoLinks to detect and handle links to issues and pulls
func ( r * stripRenderer ) processAutoLink ( w io . Writer , link [ ] byte ) {
linkStr := string ( link )
u , err := url . Parse ( linkStr )
if err != nil {
// Process out of band
r . links = append ( r . links , linkStr )
return
}
// Note: we're not attempting to match the URL scheme (http/https)
host := strings . ToLower ( u . Host )
if host != "" && host != strings . ToLower ( r . localhost . Host ) {
// Process out of band
r . links = append ( r . links , linkStr )
return
}
// We want: /user/repo/issues/3
parts := strings . Split ( strings . TrimPrefix ( u . EscapedPath ( ) , r . localhost . EscapedPath ( ) ) , "/" )
if len ( parts ) != 5 || parts [ 0 ] != "" {
// Process out of band
r . links = append ( r . links , linkStr )
return
}
var sep string
if parts [ 3 ] == "issues" {
sep = "#"
} else if parts [ 3 ] == "pulls" {
sep = "!"
} else {
// Process out of band
r . links = append ( r . links , linkStr )
return
}
_ , _ = w . Write ( [ ] byte ( parts [ 1 ] ) )
_ , _ = w . Write ( [ ] byte ( "/" ) )
_ , _ = w . Write ( [ ] byte ( parts [ 2 ] ) )
_ , _ = w . Write ( [ ] byte ( sep ) )
_ , _ = w . Write ( [ ] byte ( parts [ 4 ] ) )
}
2019-12-31 04:53:28 +03:00
func ( r * stripRenderer ) processLink ( w io . Writer , link [ ] byte ) {
2019-10-14 01:29:10 +03:00
// Links are processed out of band
r . links = append ( r . links , string ( link ) )
}
// GetLinks returns the list of link data collected while parsing
2019-12-31 04:53:28 +03:00
func ( r * stripRenderer ) GetLinks ( ) [ ] string {
2019-10-14 01:29:10 +03:00
return r . links
}
2019-12-31 04:53:28 +03:00
// AddOptions adds given option to this renderer.
func ( r * stripRenderer ) AddOptions ( ... renderer . Option ) {
// no-op
}
// StripMarkdown parses markdown content by removing all markup and code blocks
// in order to extract links and other references
func StripMarkdown ( rawBytes [ ] byte ) ( string , [ ] string ) {
buf , links := StripMarkdownBytes ( rawBytes )
return string ( buf ) , links
}
var stripParser parser . Parser
var once = sync . Once { }
// StripMarkdownBytes parses markdown content by removing all markup and code blocks
// in order to extract links and other references
func StripMarkdownBytes ( rawBytes [ ] byte ) ( [ ] byte , [ ] string ) {
once . Do ( func ( ) {
gdMarkdown := goldmark . New (
goldmark . WithExtensions ( extension . Table ,
extension . Strikethrough ,
extension . TaskList ,
extension . DefinitionList ,
common . FootnoteExtension ,
common . Linkify ,
) ,
goldmark . WithParserOptions (
parser . WithAttribute ( ) ,
parser . WithAutoHeadingID ( ) ,
) ,
goldmark . WithRendererOptions (
html . WithUnsafe ( ) ,
) ,
)
stripParser = gdMarkdown . Parser ( )
} )
stripper := & stripRenderer {
2020-08-06 22:20:05 +03:00
localhost : getGiteaHost ( ) ,
links : make ( [ ] string , 0 , 10 ) ,
empty : true ,
2019-12-31 04:53:28 +03:00
}
reader := text . NewReader ( rawBytes )
doc := stripParser . Parse ( reader )
var buf bytes . Buffer
if err := stripper . Render ( & buf , rawBytes , doc ) ; err != nil {
log . Error ( "Unable to strip: %v" , err )
}
return buf . Bytes ( ) , stripper . GetLinks ( )
}
2020-08-06 22:20:05 +03:00
// getGiteaHostName returns a normalized string with the local host name, with no scheme or port information
func getGiteaHost ( ) * url . URL {
giteaHostInit . Do ( func ( ) {
var err error
if giteaHost , err = url . Parse ( setting . AppURL ) ; err != nil {
giteaHost = & url . URL { }
}
} )
return giteaHost
}