2017-04-13 05:52:24 +03:00
// Copyright 2017 The Gitea Authors. All rights reserved.
// Copyright 2017 The Gogs Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
2017-09-16 20:17:57 +03:00
package markup
2017-04-13 05:52:24 +03:00
import (
2019-12-31 04:53:28 +03:00
"bytes"
"io"
2017-04-13 05:52:24 +03:00
"regexp"
"sync"
"code.gitea.io/gitea/modules/setting"
"github.com/microcosm-cc/bluemonday"
)
// Sanitizer is a protection wrapper of *bluemonday.Policy which does not allow
// any modification to the underlying policies once it's been created.
type Sanitizer struct {
policy * bluemonday . Policy
init sync . Once
}
var sanitizer = & Sanitizer { }
// NewSanitizer initializes sanitizer with allowed attributes based on settings.
// Multiple calls to this function will only create one instance of Sanitizer during
// entire application lifecycle.
func NewSanitizer ( ) {
sanitizer . init . Do ( func ( ) {
2019-10-15 04:31:09 +03:00
ReplaceSanitizer ( )
} )
}
2017-04-13 05:52:24 +03:00
2019-10-15 04:31:09 +03:00
// ReplaceSanitizer replaces the current sanitizer to account for changes in settings
func ReplaceSanitizer ( ) {
sanitizer . policy = bluemonday . UGCPolicy ( )
// We only want to allow HighlightJS specific classes for code blocks
2020-01-20 00:36:00 +03:00
sanitizer . policy . AllowAttrs ( "class" ) . Matching ( regexp . MustCompile ( ` ^language-[\w-]+$ ` ) ) . OnElements ( "code" )
2017-04-13 05:52:24 +03:00
2019-10-15 04:31:09 +03:00
// Checkboxes
sanitizer . policy . AllowAttrs ( "type" ) . Matching ( regexp . MustCompile ( ` ^checkbox$ ` ) ) . OnElements ( "input" )
sanitizer . policy . AllowAttrs ( "checked" , "disabled" ) . OnElements ( "input" )
2019-10-14 01:29:10 +03:00
2019-10-15 04:31:09 +03:00
// Custom URL-Schemes
sanitizer . policy . AllowURLSchemes ( setting . Markdown . CustomURLSchemes ... )
// Allow keyword markup
sanitizer . policy . AllowAttrs ( "class" ) . Matching ( regexp . MustCompile ( ` ^ ` + keywordClass + ` $ ` ) ) . OnElements ( "span" )
2019-12-03 22:02:41 +03:00
// Allow <kbd> tags for keyboard shortcut styling
sanitizer . policy . AllowElements ( "kbd" )
2019-12-07 22:49:04 +03:00
// Custom keyword markup
for _ , rule := range setting . ExternalSanitizerRules {
if rule . Regexp != nil {
sanitizer . policy . AllowAttrs ( rule . AllowAttr ) . Matching ( rule . Regexp ) . OnElements ( rule . Element )
} else {
sanitizer . policy . AllowAttrs ( rule . AllowAttr ) . OnElements ( rule . Element )
}
}
2017-04-13 05:52:24 +03:00
}
// Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist.
func Sanitize ( s string ) string {
2017-04-19 14:16:36 +03:00
NewSanitizer ( )
2017-04-13 05:52:24 +03:00
return sanitizer . policy . Sanitize ( s )
}
2019-12-31 04:53:28 +03:00
// SanitizeReader sanitizes a Reader
func SanitizeReader ( r io . Reader ) * bytes . Buffer {
NewSanitizer ( )
return sanitizer . policy . SanitizeReader ( r )
}
2017-04-13 05:52:24 +03:00
// SanitizeBytes takes a []byte slice that contains a HTML fragment or document and applies policy whitelist.
func SanitizeBytes ( b [ ] byte ) [ ] byte {
if len ( b ) == 0 {
// nothing to sanitize
return b
}
2017-04-19 14:16:36 +03:00
NewSanitizer ( )
2017-04-13 05:52:24 +03:00
return sanitizer . policy . SanitizeBytes ( b )
}