2020-04-28 21:05:39 +03:00
// Copyright 2020 The Gitea Authors. All rights reserved.
// Copyright 2015 Kenneth Shaw
2022-11-27 21:20:29 +03:00
// SPDX-License-Identifier: MIT
2020-04-28 21:05:39 +03:00
package emoji
import (
2021-06-17 13:35:05 +03:00
"io"
2020-05-29 19:08:36 +03:00
"sort"
2020-04-28 21:05:39 +03:00
"strings"
"sync"
)
// Gemoji is a set of emoji data.
type Gemoji [ ] Emoji
// Emoji represents a single emoji and associated data.
type Emoji struct {
Emoji string
Description string
Aliases [ ] string
UnicodeVersion string
2020-06-02 09:22:40 +03:00
SkinTones bool
2020-04-28 21:05:39 +03:00
}
var (
// codeMap provides a map of the emoji unicode code to its emoji data.
codeMap map [ string ] int
// aliasMap provides a map of the alias to its emoji data.
aliasMap map [ string ] int
2021-03-16 02:20:05 +03:00
// emptyReplacer is the string replacer for emoji codes.
emptyReplacer * strings . Replacer
2020-04-28 21:05:39 +03:00
// codeReplacer is the string replacer for emoji codes.
codeReplacer * strings . Replacer
// aliasReplacer is the string replacer for emoji aliases.
aliasReplacer * strings . Replacer
once sync . Once
)
func loadMap ( ) {
once . Do ( func ( ) {
// initialize
codeMap = make ( map [ string ] int , len ( GemojiData ) )
aliasMap = make ( map [ string ] int , len ( GemojiData ) )
// process emoji codes and aliases
codePairs := make ( [ ] string , 0 )
2021-03-16 02:20:05 +03:00
emptyPairs := make ( [ ] string , 0 )
2020-04-28 21:05:39 +03:00
aliasPairs := make ( [ ] string , 0 )
2020-05-29 19:08:36 +03:00
// sort from largest to small so we match combined emoji first
sort . Slice ( GemojiData , func ( i , j int ) bool {
return len ( GemojiData [ i ] . Emoji ) > len ( GemojiData [ j ] . Emoji )
} )
2020-04-28 21:05:39 +03:00
for i , e := range GemojiData {
if e . Emoji == "" || len ( e . Aliases ) == 0 {
continue
}
// setup codes
codeMap [ e . Emoji ] = i
codePairs = append ( codePairs , e . Emoji , ":" + e . Aliases [ 0 ] + ":" )
2021-03-16 02:20:05 +03:00
emptyPairs = append ( emptyPairs , e . Emoji , e . Emoji )
2020-04-28 21:05:39 +03:00
// setup aliases
for _ , a := range e . Aliases {
if a == "" {
continue
}
aliasMap [ a ] = i
aliasPairs = append ( aliasPairs , ":" + a + ":" , e . Emoji )
}
}
// create replacers
2021-03-16 02:20:05 +03:00
emptyReplacer = strings . NewReplacer ( emptyPairs ... )
2020-04-28 21:05:39 +03:00
codeReplacer = strings . NewReplacer ( codePairs ... )
aliasReplacer = strings . NewReplacer ( aliasPairs ... )
} )
}
// FromCode retrieves the emoji data based on the provided unicode code (ie,
// "\u2618" will return the Gemoji data for "shamrock").
func FromCode ( code string ) * Emoji {
loadMap ( )
i , ok := codeMap [ code ]
if ! ok {
return nil
}
return & GemojiData [ i ]
}
// FromAlias retrieves the emoji data based on the provided alias in the form
// "alias" or ":alias:" (ie, "shamrock" or ":shamrock:" will return the Gemoji
// data for "shamrock").
func FromAlias ( alias string ) * Emoji {
loadMap ( )
if strings . HasPrefix ( alias , ":" ) && strings . HasSuffix ( alias , ":" ) {
alias = alias [ 1 : len ( alias ) - 1 ]
}
i , ok := aliasMap [ alias ]
if ! ok {
return nil
}
return & GemojiData [ i ]
}
// ReplaceCodes replaces all emoji codes with the first corresponding emoji
// alias (in the form of ":alias:") (ie, "\u2618" will be converted to
// ":shamrock:").
func ReplaceCodes ( s string ) string {
loadMap ( )
return codeReplacer . Replace ( s )
}
// ReplaceAliases replaces all aliases of the form ":alias:" with its
// corresponding unicode value.
func ReplaceAliases ( s string ) string {
loadMap ( )
return aliasReplacer . Replace ( s )
}
2020-05-29 19:08:36 +03:00
2021-03-16 02:20:05 +03:00
type rememberSecondWriteWriter struct {
pos int
idx int
end int
writecount int
}
2020-05-29 19:08:36 +03:00
2021-03-16 02:20:05 +03:00
func ( n * rememberSecondWriteWriter ) Write ( p [ ] byte ) ( int , error ) {
n . writecount ++
if n . writecount == 2 {
n . idx = n . pos
n . end = n . pos + len ( p )
2021-06-17 13:35:05 +03:00
n . pos += len ( p )
return len ( p ) , io . EOF
2020-05-29 19:08:36 +03:00
}
2021-03-16 02:20:05 +03:00
n . pos += len ( p )
return len ( p ) , nil
}
2020-06-02 09:22:40 +03:00
2021-03-16 02:20:05 +03:00
func ( n * rememberSecondWriteWriter ) WriteString ( s string ) ( int , error ) {
n . writecount ++
if n . writecount == 2 {
n . idx = n . pos
n . end = n . pos + len ( s )
2021-06-17 13:35:05 +03:00
n . pos += len ( s )
return len ( s ) , io . EOF
2020-05-29 19:08:36 +03:00
}
2021-03-16 02:20:05 +03:00
n . pos += len ( s )
return len ( s ) , nil
}
2020-07-25 16:40:04 +03:00
2021-03-16 02:20:05 +03:00
// FindEmojiSubmatchIndex returns index pair of longest emoji in a string
func FindEmojiSubmatchIndex ( s string ) [ ] int {
loadMap ( )
secondWriteWriter := rememberSecondWriteWriter { }
// A faster and clean implementation would copy the trie tree formation in strings.NewReplacer but
// we can be lazy here.
//
// The implementation of strings.Replacer.WriteString is such that the first index of the emoji
// submatch is simply the second thing that is written to WriteString in the writer.
//
// Therefore we can simply take the index of the second write as our first emoji
//
// FIXME: just copy the trie implementation from strings.NewReplacer
_ , _ = emptyReplacer . WriteString ( & secondWriteWriter , s )
// if we wrote less than twice then we never "replaced"
if secondWriteWriter . writecount < 2 {
return nil
2020-07-25 16:40:04 +03:00
}
2021-03-16 02:20:05 +03:00
return [ ] int { secondWriteWriter . idx , secondWriteWriter . end }
2020-05-29 19:08:36 +03:00
}