2020-09-11 17:48:39 +03:00
// Copyright 2020 The Gitea Authors. All rights reserved.
2022-11-27 21:20:29 +03:00
// SPDX-License-Identifier: MIT
2020-09-11 17:48:39 +03:00
package markdown
import (
2022-09-13 19:33:37 +03:00
"bytes"
2020-09-11 17:48:39 +03:00
"errors"
2022-09-13 19:33:37 +03:00
"unicode"
"unicode/utf8"
2020-09-11 17:48:39 +03:00
2022-09-13 19:33:37 +03:00
"gopkg.in/yaml.v3"
2020-09-11 17:48:39 +03:00
)
2022-09-13 19:33:37 +03:00
func isYAMLSeparator ( line [ ] byte ) bool {
idx := 0
for ; idx < len ( line ) ; idx ++ {
if line [ idx ] >= utf8 . RuneSelf {
r , sz := utf8 . DecodeRune ( line [ idx : ] )
if ! unicode . IsSpace ( r ) {
return false
}
idx += sz
continue
}
if line [ idx ] != ' ' {
break
}
}
dashCount := 0
for ; idx < len ( line ) ; idx ++ {
if line [ idx ] != '-' {
break
}
dashCount ++
}
if dashCount < 3 {
return false
}
for ; idx < len ( line ) ; idx ++ {
if line [ idx ] >= utf8 . RuneSelf {
r , sz := utf8 . DecodeRune ( line [ idx : ] )
if ! unicode . IsSpace ( r ) {
return false
}
idx += sz
continue
}
if line [ idx ] != ' ' {
2020-09-11 17:48:39 +03:00
return false
}
}
2022-09-13 19:33:37 +03:00
return true
2020-09-11 17:48:39 +03:00
}
// ExtractMetadata consumes a markdown file, parses YAML frontmatter,
// and returns the frontmatter metadata separated from the markdown content
2023-07-04 21:36:08 +03:00
func ExtractMetadata ( contents string , out any ) ( string , error ) {
2022-09-13 19:33:37 +03:00
body , err := ExtractMetadataBytes ( [ ] byte ( contents ) , out )
return string ( body ) , err
}
// ExtractMetadata consumes a markdown file, parses YAML frontmatter,
// and returns the frontmatter metadata separated from the markdown content
2023-07-04 21:36:08 +03:00
func ExtractMetadataBytes ( contents [ ] byte , out any ) ( [ ] byte , error ) {
2022-09-13 19:33:37 +03:00
var front , body [ ] byte
start , end := 0 , len ( contents )
idx := bytes . IndexByte ( contents [ start : ] , '\n' )
if idx >= 0 {
end = start + idx
}
line := contents [ start : end ]
if ! isYAMLSeparator ( line ) {
return contents , errors . New ( "frontmatter must start with a separator line" )
}
frontMatterStart := end + 1
for start = frontMatterStart ; start < len ( contents ) ; start = end + 1 {
end = len ( contents )
idx := bytes . IndexByte ( contents [ start : ] , '\n' )
if idx >= 0 {
end = start + idx
2020-09-11 17:48:39 +03:00
}
2022-09-13 19:33:37 +03:00
line := contents [ start : end ]
2020-09-11 17:48:39 +03:00
if isYAMLSeparator ( line ) {
2022-09-13 19:33:37 +03:00
front = contents [ frontMatterStart : start ]
2022-10-05 21:55:36 +03:00
if end + 1 < len ( contents ) {
body = contents [ end + 1 : ]
}
2020-09-13 04:48:47 +03:00
break
2020-09-11 17:48:39 +03:00
}
}
2020-09-13 04:48:47 +03:00
if len ( front ) == 0 {
2022-09-13 19:33:37 +03:00
return contents , errors . New ( "could not determine metadata" )
2020-09-11 17:48:39 +03:00
}
2022-09-13 19:33:37 +03:00
if err := yaml . Unmarshal ( front , out ) ; err != nil {
return contents , err
2020-09-11 17:48:39 +03:00
}
2022-09-13 19:33:37 +03:00
return body , nil
2020-09-11 17:48:39 +03:00
}