2024-11-07 11:57:07 +08:00
import { htmlEscape } from 'escape-goat' ;
type Processors = {
[ tagName : string ] : ( el : HTMLElement ) = > string | HTMLElement | void ;
}
type ProcessorContext = {
elementIsFirst : boolean ;
elementIsLast : boolean ;
listNestingLevel : number ;
}
function prepareProcessors ( ctx :ProcessorContext ) : Processors {
const processors = {
2024-11-11 12:13:57 +01:00
H1 ( el : HTMLHeadingElement ) {
2024-11-07 11:57:07 +08:00
const level = parseInt ( el . tagName . slice ( 1 ) ) ;
el . textContent = ` ${ '#' . repeat ( level ) } ${ el . textContent . trim ( ) } ` ;
} ,
2024-11-11 12:13:57 +01:00
STRONG ( el : HTMLElement ) {
2024-11-07 11:57:07 +08:00
return ` ** ${ el . textContent } ** ` ;
} ,
2024-11-11 12:13:57 +01:00
EM ( el : HTMLElement ) {
2024-11-07 11:57:07 +08:00
return ` _ ${ el . textContent } _ ` ;
} ,
2024-11-11 12:13:57 +01:00
DEL ( el : HTMLElement ) {
2024-11-07 11:57:07 +08:00
return ` ~~ ${ el . textContent } ~~ ` ;
} ,
2024-11-11 12:13:57 +01:00
A ( el : HTMLAnchorElement ) {
2024-11-07 11:57:07 +08:00
const text = el . textContent || 'link' ;
const href = el . getAttribute ( 'href' ) ;
if ( /^https?:/ . test ( text ) && text === href ) {
return text ;
}
return href ? ` [ ${ text } ]( ${ href } ) ` : text ;
} ,
2024-11-11 12:13:57 +01:00
IMG ( el : HTMLImageElement ) {
2024-11-07 11:57:07 +08:00
const alt = el . getAttribute ( 'alt' ) || 'image' ;
const src = el . getAttribute ( 'src' ) ;
const widthAttr = el . hasAttribute ( 'width' ) ? ` width=" ${ htmlEscape ( el . getAttribute ( 'width' ) || '' ) } " ` : '' ;
const heightAttr = el . hasAttribute ( 'height' ) ? ` height=" ${ htmlEscape ( el . getAttribute ( 'height' ) || '' ) } " ` : '' ;
if ( widthAttr || heightAttr ) {
return ` <img alt=" ${ htmlEscape ( alt ) } " ${ widthAttr } ${ heightAttr } src=" ${ htmlEscape ( src ) } "> ` ;
}
return ` ![ ${ alt } ]( ${ src } ) ` ;
} ,
2024-11-11 12:13:57 +01:00
P ( el : HTMLParagraphElement ) {
2024-11-07 11:57:07 +08:00
el . textContent = ` ${ el . textContent } \ n ` ;
} ,
2024-11-11 12:13:57 +01:00
BLOCKQUOTE ( el : HTMLElement ) {
2024-11-07 11:57:07 +08:00
el . textContent = ` ${ el . textContent . replace ( /^/mg , '> ' ) } \ n ` ;
} ,
2024-11-11 12:13:57 +01:00
OL ( el : HTMLElement ) {
2024-11-07 11:57:07 +08:00
const preNewLine = ctx . listNestingLevel ? '\n' : '' ;
el . textContent = ` ${ preNewLine } ${ el . textContent } \ n ` ;
} ,
2024-11-11 12:13:57 +01:00
LI ( el : HTMLElement ) {
2024-11-07 11:57:07 +08:00
const parent = el . parentNode ;
2024-11-11 12:13:57 +01:00
const bullet = ( parent as HTMLElement ) . tagName === 'OL' ? ` 1. ` : '* ' ;
2024-11-07 11:57:07 +08:00
const nestingIdentLevel = Math . max ( 0 , ctx . listNestingLevel - 1 ) ;
el . textContent = ` ${ ' ' . repeat ( nestingIdentLevel * 4 ) } ${ bullet } ${ el . textContent } ${ ctx . elementIsLast ? '' : '\n' } ` ;
return el ;
} ,
2024-11-11 12:13:57 +01:00
INPUT ( el : HTMLInputElement ) {
2024-11-07 11:57:07 +08:00
return el . checked ? '[x] ' : '[ ] ' ;
} ,
2024-11-11 12:13:57 +01:00
CODE ( el : HTMLElement ) {
2024-11-07 11:57:07 +08:00
const text = el . textContent ;
2024-11-11 12:13:57 +01:00
if ( el . parentNode && ( el . parentNode as HTMLElement ) . tagName === 'PRE' ) {
2024-11-07 11:57:07 +08:00
el . textContent = ` \` \` \` \ n ${ text } \ n \` \` \` \ n ` ;
return el ;
}
if ( text . includes ( '`' ) ) {
return ` \` \` ${ text } \` \` ` ;
}
return ` \` ${ text } \` ` ;
} ,
} ;
processors [ 'UL' ] = processors . OL ;
for ( let level = 2 ; level <= 6 ; level ++ ) {
processors [ ` H ${ level } ` ] = processors . H1 ;
}
return processors ;
}
2024-11-11 12:13:57 +01:00
function processElement ( ctx :ProcessorContext , processors : Processors , el : HTMLElement ) : string | void {
2024-11-07 11:57:07 +08:00
if ( el . hasAttribute ( 'data-markdown-generated-content' ) ) return el . textContent ;
if ( el . tagName === 'A' && el . children . length === 1 && el . children [ 0 ] . tagName === 'IMG' ) {
return processElement ( ctx , processors , el . children [ 0 ] as HTMLElement ) ;
}
const isListContainer = el . tagName === 'OL' || el . tagName === 'UL' ;
if ( isListContainer ) ctx . listNestingLevel ++ ;
for ( let i = 0 ; i < el . children . length ; i ++ ) {
ctx . elementIsFirst = i === 0 ;
ctx . elementIsLast = i === el . children . length - 1 ;
processElement ( ctx , processors , el . children [ i ] as HTMLElement ) ;
}
if ( isListContainer ) ctx . listNestingLevel -- ;
if ( processors [ el . tagName ] ) {
const ret = processors [ el . tagName ] ( el ) ;
if ( ret && ret !== el ) {
el . replaceWith ( typeof ret === 'string' ? document . createTextNode ( ret ) : ret ) ;
}
}
}
export function convertHtmlToMarkdown ( el : HTMLElement ) : string {
const div = document . createElement ( 'div' ) ;
div . append ( el ) ;
const ctx = { } as ProcessorContext ;
ctx . listNestingLevel = 0 ;
processElement ( ctx , prepareProcessors ( ctx ) , el ) ;
return div . textContent ;
}