2001-02-23 20:55:21 +03:00
/*
2001-12-31 19:16:02 +03:00
* HTMLtree . c : implementation of access function for an HTML tree .
2001-02-23 20:55:21 +03:00
*
* See Copyright for the status of this software .
*
2001-06-24 16:13:24 +04:00
* daniel @ veillard . com
2001-02-23 20:55:21 +03:00
*/
2002-03-18 22:37:11 +03:00
# define IN_LIBXML
2001-04-21 20:57:29 +04:00
# include "libxml.h"
2001-02-23 20:55:21 +03:00
# ifdef LIBXML_HTML_ENABLED
2002-11-27 22:39:27 +03:00
# include <string.h> /* for memset() only ! */
2001-02-23 20:55:21 +03:00
# include <ctype.h>
# include <stdlib.h>
# include <libxml/xmlmemory.h>
# include <libxml/HTMLparser.h>
# include <libxml/HTMLtree.h>
# include <libxml/entities.h>
# include <libxml/valid.h>
# include <libxml/xmlerror.h>
# include <libxml/parserInternals.h>
2001-10-17 19:58:35 +04:00
# include <libxml/globals.h>
2002-04-15 02:00:22 +04:00
# include <libxml/uri.h>
2001-02-23 20:55:21 +03:00
2012-07-16 10:58:02 +04:00
# include "buf.h"
2001-02-23 20:55:21 +03:00
/************************************************************************
* *
2012-09-11 09:26:36 +04:00
* Getting / Setting encoding meta tags *
2001-02-23 20:55:21 +03:00
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* htmlGetMetaEncoding :
* @ doc : the document
2012-09-11 09:26:36 +04:00
*
2001-02-23 20:55:21 +03:00
* Encoding definition lookup in the Meta tags
*
* Returns the current encoding as flagged in the HTML source
*/
const xmlChar *
htmlGetMetaEncoding ( htmlDocPtr doc ) {
htmlNodePtr cur ;
const xmlChar * content ;
const xmlChar * encoding ;
if ( doc = = NULL )
return ( NULL ) ;
cur = doc - > children ;
/*
* Search the html
*/
while ( cur ! = NULL ) {
2001-10-23 17:10:19 +04:00
if ( ( cur - > type = = XML_ELEMENT_NODE ) & & ( cur - > name ! = NULL ) ) {
2001-02-23 20:55:21 +03:00
if ( xmlStrEqual ( cur - > name , BAD_CAST " html " ) )
break ;
if ( xmlStrEqual ( cur - > name , BAD_CAST " head " ) )
goto found_head ;
if ( xmlStrEqual ( cur - > name , BAD_CAST " meta " ) )
goto found_meta ;
}
cur = cur - > next ;
}
if ( cur = = NULL )
return ( NULL ) ;
cur = cur - > children ;
/*
* Search the head
*/
while ( cur ! = NULL ) {
2001-10-23 17:10:19 +04:00
if ( ( cur - > type = = XML_ELEMENT_NODE ) & & ( cur - > name ! = NULL ) ) {
2001-02-23 20:55:21 +03:00
if ( xmlStrEqual ( cur - > name , BAD_CAST " head " ) )
break ;
if ( xmlStrEqual ( cur - > name , BAD_CAST " meta " ) )
goto found_meta ;
}
cur = cur - > next ;
}
if ( cur = = NULL )
return ( NULL ) ;
found_head :
cur = cur - > children ;
/*
* Search the meta elements
*/
found_meta :
while ( cur ! = NULL ) {
2001-10-23 17:10:19 +04:00
if ( ( cur - > type = = XML_ELEMENT_NODE ) & & ( cur - > name ! = NULL ) ) {
2001-02-23 20:55:21 +03:00
if ( xmlStrEqual ( cur - > name , BAD_CAST " meta " ) ) {
xmlAttrPtr attr = cur - > properties ;
int http ;
const xmlChar * value ;
content = NULL ;
http = 0 ;
while ( attr ! = NULL ) {
if ( ( attr - > children ! = NULL ) & &
( attr - > children - > type = = XML_TEXT_NODE ) & &
( attr - > children - > next = = NULL ) ) {
value = attr - > children - > content ;
if ( ( ! xmlStrcasecmp ( attr - > name , BAD_CAST " http-equiv " ) )
& & ( ! xmlStrcasecmp ( value , BAD_CAST " Content-Type " ) ) )
http = 1 ;
else if ( ( value ! = NULL )
& & ( ! xmlStrcasecmp ( attr - > name , BAD_CAST " content " ) ) )
content = value ;
if ( ( http ! = 0 ) & & ( content ! = NULL ) )
goto found_content ;
}
attr = attr - > next ;
}
}
}
cur = cur - > next ;
}
return ( NULL ) ;
found_content :
encoding = xmlStrstr ( content , BAD_CAST " charset= " ) ;
2012-09-11 09:26:36 +04:00
if ( encoding = = NULL )
2001-02-23 20:55:21 +03:00
encoding = xmlStrstr ( content , BAD_CAST " Charset= " ) ;
2012-09-11 09:26:36 +04:00
if ( encoding = = NULL )
2001-02-23 20:55:21 +03:00
encoding = xmlStrstr ( content , BAD_CAST " CHARSET= " ) ;
if ( encoding ! = NULL ) {
encoding + = 8 ;
} else {
encoding = xmlStrstr ( content , BAD_CAST " charset = " ) ;
2012-09-11 09:26:36 +04:00
if ( encoding = = NULL )
2001-02-23 20:55:21 +03:00
encoding = xmlStrstr ( content , BAD_CAST " Charset = " ) ;
2012-09-11 09:26:36 +04:00
if ( encoding = = NULL )
2001-02-23 20:55:21 +03:00
encoding = xmlStrstr ( content , BAD_CAST " CHARSET = " ) ;
if ( encoding ! = NULL )
encoding + = 9 ;
}
if ( encoding ! = NULL ) {
while ( ( * encoding = = ' ' ) | | ( * encoding = = ' \t ' ) ) encoding + + ;
}
return ( encoding ) ;
}
/**
* htmlSetMetaEncoding :
* @ doc : the document
* @ encoding : the encoding string
2012-05-11 08:38:23 +04:00
*
2001-02-23 20:55:21 +03:00
* Sets the current encoding in the Meta tags
* NOTE : this will not change the document content encoding , just
* the META flag associated .
*
* Returns 0 in case of success and - 1 in case of error
*/
int
htmlSetMetaEncoding ( htmlDocPtr doc , const xmlChar * encoding ) {
2009-08-13 01:03:23 +04:00
htmlNodePtr cur , meta = NULL , head = NULL ;
const xmlChar * content = NULL ;
2001-02-23 20:55:21 +03:00
char newcontent [ 100 ] ;
2012-05-11 08:38:23 +04:00
newcontent [ 0 ] = 0 ;
2001-02-23 20:55:21 +03:00
if ( doc = = NULL )
return ( - 1 ) ;
2009-08-12 17:59:01 +04:00
/* html isn't a real encoding it's just libxml2 way to get entities */
if ( ! xmlStrcasecmp ( encoding , BAD_CAST " html " ) )
return ( - 1 ) ;
2001-02-23 20:55:21 +03:00
if ( encoding ! = NULL ) {
snprintf ( newcontent , sizeof ( newcontent ) , " text/html; charset=%s " ,
2004-09-18 08:52:08 +04:00
( char * ) encoding ) ;
2001-02-23 20:55:21 +03:00
newcontent [ sizeof ( newcontent ) - 1 ] = 0 ;
}
cur = doc - > children ;
/*
* Search the html
*/
while ( cur ! = NULL ) {
2001-10-23 17:10:19 +04:00
if ( ( cur - > type = = XML_ELEMENT_NODE ) & & ( cur - > name ! = NULL ) ) {
2001-04-18 13:45:35 +04:00
if ( xmlStrcasecmp ( cur - > name , BAD_CAST " html " ) = = 0 )
break ;
if ( xmlStrcasecmp ( cur - > name , BAD_CAST " head " ) = = 0 )
goto found_head ;
if ( xmlStrcasecmp ( cur - > name , BAD_CAST " meta " ) = = 0 )
goto found_meta ;
2001-02-23 20:55:21 +03:00
}
cur = cur - > next ;
}
if ( cur = = NULL )
return ( - 1 ) ;
cur = cur - > children ;
/*
* Search the head
*/
while ( cur ! = NULL ) {
2001-10-23 17:10:19 +04:00
if ( ( cur - > type = = XML_ELEMENT_NODE ) & & ( cur - > name ! = NULL ) ) {
2001-04-18 13:45:35 +04:00
if ( xmlStrcasecmp ( cur - > name , BAD_CAST " head " ) = = 0 )
break ;
2009-08-13 01:03:23 +04:00
if ( xmlStrcasecmp ( cur - > name , BAD_CAST " meta " ) = = 0 ) {
head = cur - > parent ;
2001-04-18 13:45:35 +04:00
goto found_meta ;
2009-08-13 01:03:23 +04:00
}
2001-02-23 20:55:21 +03:00
}
cur = cur - > next ;
}
if ( cur = = NULL )
return ( - 1 ) ;
found_head :
2009-08-13 01:03:23 +04:00
head = cur ;
if ( cur - > children = = NULL )
goto create ;
2001-02-23 20:55:21 +03:00
cur = cur - > children ;
found_meta :
/*
2009-08-13 01:03:23 +04:00
* Search and update all the remaining the meta elements carrying
2020-03-08 19:19:42 +03:00
* encoding information
2001-02-23 20:55:21 +03:00
*/
while ( cur ! = NULL ) {
2001-10-23 17:10:19 +04:00
if ( ( cur - > type = = XML_ELEMENT_NODE ) & & ( cur - > name ! = NULL ) ) {
2001-04-18 13:45:35 +04:00
if ( xmlStrcasecmp ( cur - > name , BAD_CAST " meta " ) = = 0 ) {
2001-02-23 20:55:21 +03:00
xmlAttrPtr attr = cur - > properties ;
int http ;
const xmlChar * value ;
content = NULL ;
http = 0 ;
while ( attr ! = NULL ) {
if ( ( attr - > children ! = NULL ) & &
( attr - > children - > type = = XML_TEXT_NODE ) & &
( attr - > children - > next = = NULL ) ) {
value = attr - > children - > content ;
if ( ( ! xmlStrcasecmp ( attr - > name , BAD_CAST " http-equiv " ) )
& & ( ! xmlStrcasecmp ( value , BAD_CAST " Content-Type " ) ) )
http = 1 ;
2009-08-13 01:03:23 +04:00
else
2001-04-18 13:45:35 +04:00
{
2012-05-11 08:38:23 +04:00
if ( ( value ! = NULL ) & &
2009-08-13 01:03:23 +04:00
( ! xmlStrcasecmp ( attr - > name , BAD_CAST " content " ) ) )
content = value ;
2001-04-18 13:45:35 +04:00
}
2002-03-07 00:39:42 +03:00
if ( ( http ! = 0 ) & & ( content ! = NULL ) )
2001-02-23 20:55:21 +03:00
break ;
}
attr = attr - > next ;
}
2002-03-07 00:39:42 +03:00
if ( ( http ! = 0 ) & & ( content ! = NULL ) ) {
2001-02-23 20:55:21 +03:00
meta = cur ;
2009-08-13 01:03:23 +04:00
break ;
2001-02-23 20:55:21 +03:00
}
}
}
cur = cur - > next ;
}
2009-08-13 01:03:23 +04:00
create :
if ( meta = = NULL ) {
if ( ( encoding ! = NULL ) & & ( head ! = NULL ) ) {
/*
* Create a new Meta element with the right attributes
*/
meta = xmlNewDocNode ( doc , NULL , BAD_CAST " meta " , NULL ) ;
if ( head - > children = = NULL )
xmlAddChild ( head , meta ) ;
else
xmlAddPrevSibling ( head - > children , meta ) ;
xmlNewProp ( meta , BAD_CAST " http-equiv " , BAD_CAST " Content-Type " ) ;
xmlNewProp ( meta , BAD_CAST " content " , BAD_CAST newcontent ) ;
}
} else {
2012-05-11 08:38:23 +04:00
/* remove the meta tag if NULL is passed */
if ( encoding = = NULL ) {
xmlUnlinkNode ( meta ) ;
xmlFreeNode ( meta ) ;
}
2009-08-13 01:03:23 +04:00
/* change the document only if there is a real encoding change */
2012-05-11 08:38:23 +04:00
else if ( xmlStrcasestr ( content , encoding ) = = NULL ) {
2009-08-13 01:03:23 +04:00
xmlSetProp ( meta , BAD_CAST " content " , BAD_CAST newcontent ) ;
}
}
2001-02-23 20:55:21 +03:00
return ( 0 ) ;
}
2002-08-12 17:27:28 +04:00
/**
* booleanHTMLAttrs :
*
* These are the HTML attributes which will be output
* in minimized form , i . e . < option selected = " selected " > will be
* output as < option selected > , as per XSLT 1.0 16.2 " HTML Output Method "
*
*/
2016-05-21 01:21:43 +03:00
static const char * const htmlBooleanAttrs [ ] = {
2002-08-12 17:27:28 +04:00
" checked " , " compact " , " declare " , " defer " , " disabled " , " ismap " ,
" multiple " , " nohref " , " noresize " , " noshade " , " nowrap " , " readonly " ,
" selected " , NULL
} ;
/**
* htmlIsBooleanAttr :
* @ name : the name of the attribute to check
*
* Determine if a given attribute is a boolean attribute .
2012-09-11 09:26:36 +04:00
*
2002-08-12 17:27:28 +04:00
* returns : false if the attribute is not boolean , true otherwise .
*/
int
htmlIsBooleanAttr ( const xmlChar * name )
{
int i = 0 ;
while ( htmlBooleanAttrs [ i ] ! = NULL ) {
2002-09-26 16:40:03 +04:00
if ( xmlStrcasecmp ( ( const xmlChar * ) htmlBooleanAttrs [ i ] , name ) = = 0 )
2002-08-12 17:27:28 +04:00
return 1 ;
i + + ;
}
return 0 ;
}
2003-09-29 17:20:24 +04:00
# ifdef LIBXML_OUTPUT_ENABLED
2008-09-01 17:08:57 +04:00
/*
* private routine exported from xmlIO . c
*/
xmlOutputBufferPtr
xmlAllocOutputBufferInternal ( xmlCharEncodingHandlerPtr encoder ) ;
2003-10-09 17:14:55 +04:00
/************************************************************************
* *
2012-09-11 09:26:36 +04:00
* Output error handlers *
2003-10-09 17:14:55 +04:00
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* htmlSaveErrMemory :
2020-03-08 19:19:42 +03:00
* @ extra : extra information
2003-10-09 17:14:55 +04:00
*
* Handle an out of memory condition
*/
static void
htmlSaveErrMemory ( const char * extra )
{
__xmlSimpleError ( XML_FROM_OUTPUT , XML_ERR_NO_MEMORY , NULL , NULL , extra ) ;
}
/**
* htmlSaveErr :
* @ code : the error number
* @ node : the location of the error .
2020-03-08 19:19:42 +03:00
* @ extra : extra information
2003-10-09 17:14:55 +04:00
*
* Handle an out of memory condition
*/
static void
htmlSaveErr ( int code , xmlNodePtr node , const char * extra )
{
const char * msg = NULL ;
switch ( code ) {
case XML_SAVE_NOT_UTF8 :
2006-08-16 03:14:24 +04:00
msg = " string is not in UTF-8 \n " ;
2003-10-09 17:14:55 +04:00
break ;
case XML_SAVE_CHAR_INVALID :
2006-08-16 03:14:24 +04:00
msg = " invalid character value \n " ;
2003-10-09 17:14:55 +04:00
break ;
case XML_SAVE_UNKNOWN_ENCODING :
2006-08-16 03:14:24 +04:00
msg = " unknown encoding %s \n " ;
2003-10-09 17:14:55 +04:00
break ;
case XML_SAVE_NO_DOCTYPE :
2006-08-16 03:14:24 +04:00
msg = " HTML has no DOCTYPE \n " ;
2003-10-09 17:14:55 +04:00
break ;
default :
2006-08-16 03:14:24 +04:00
msg = " unexpected error number \n " ;
2003-10-09 17:14:55 +04:00
}
__xmlSimpleError ( XML_FROM_OUTPUT , code , node , msg , extra ) ;
}
2001-02-23 20:55:21 +03:00
/************************************************************************
* *
2012-09-11 09:26:36 +04:00
* Dumping HTML tree content to a simple buffer *
2001-02-23 20:55:21 +03:00
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
2012-07-16 10:58:02 +04:00
* htmlBufNodeDumpFormat :
* @ buf : the xmlBufPtr output
2001-02-23 20:55:21 +03:00
* @ doc : the document
* @ cur : the current node
2001-06-13 17:48:46 +04:00
* @ format : should formatting spaces been added
2001-02-23 20:55:21 +03:00
*
* Dump an HTML node , recursive behaviour , children are printed too .
2002-11-27 22:39:27 +03:00
*
* Returns the number of byte written or - 1 in case of error
2001-02-23 20:55:21 +03:00
*/
2012-07-16 10:58:02 +04:00
static size_t
htmlBufNodeDumpFormat ( xmlBufPtr buf , xmlDocPtr doc , xmlNodePtr cur ,
2001-06-13 17:48:46 +04:00
int format ) {
2012-07-16 10:58:02 +04:00
size_t use ;
2002-11-27 22:39:27 +03:00
int ret ;
xmlOutputBufferPtr outbuf ;
2001-02-23 20:55:21 +03:00
if ( cur = = NULL ) {
2002-11-27 22:39:27 +03:00
return ( - 1 ) ;
}
if ( buf = = NULL ) {
return ( - 1 ) ;
}
outbuf = ( xmlOutputBufferPtr ) xmlMalloc ( sizeof ( xmlOutputBuffer ) ) ;
if ( outbuf = = NULL ) {
2003-10-09 17:14:55 +04:00
htmlSaveErrMemory ( " allocating HTML output buffer " ) ;
2002-11-27 22:39:27 +03:00
return ( - 1 ) ;
}
memset ( outbuf , 0 , ( size_t ) sizeof ( xmlOutputBuffer ) ) ;
outbuf - > buffer = buf ;
outbuf - > encoder = NULL ;
outbuf - > writecallback = NULL ;
outbuf - > closecallback = NULL ;
outbuf - > context = NULL ;
outbuf - > written = 0 ;
2012-07-16 10:58:02 +04:00
use = xmlBufUse ( buf ) ;
2002-11-27 22:39:27 +03:00
htmlNodeDumpFormatOutput ( outbuf , doc , cur , NULL , format ) ;
xmlFree ( outbuf ) ;
2012-07-16 10:58:02 +04:00
ret = xmlBufUse ( buf ) - use ;
2002-11-27 22:39:27 +03:00
return ( ret ) ;
2001-02-23 20:55:21 +03:00
}
/**
2001-06-13 17:48:46 +04:00
* htmlNodeDump :
* @ buf : the HTML buffer output
* @ doc : the document
* @ cur : the current node
*
* Dump an HTML node , recursive behaviour , children are printed too ,
* and formatting returns are added .
2002-11-27 22:39:27 +03:00
*
* Returns the number of byte written or - 1 in case of error
2001-06-13 17:48:46 +04:00
*/
2002-11-27 22:39:27 +03:00
int
2001-06-13 17:48:46 +04:00
htmlNodeDump ( xmlBufferPtr buf , xmlDocPtr doc , xmlNodePtr cur ) {
2012-07-16 10:58:02 +04:00
xmlBufPtr buffer ;
size_t ret ;
if ( ( buf = = NULL ) | | ( cur = = NULL ) )
return ( - 1 ) ;
2003-08-08 18:00:28 +04:00
xmlInitParser ( ) ;
2012-07-16 10:58:02 +04:00
buffer = xmlBufFromBuffer ( buf ) ;
if ( buffer = = NULL )
return ( - 1 ) ;
ret = htmlBufNodeDumpFormat ( buffer , doc , cur , 1 ) ;
2003-08-08 18:00:28 +04:00
2012-07-16 10:58:02 +04:00
xmlBufBackToBuffer ( buffer ) ;
if ( ret > INT_MAX )
return ( - 1 ) ;
return ( ( int ) ret ) ;
2001-06-13 17:48:46 +04:00
}
/**
* htmlNodeDumpFileFormat :
2001-02-23 20:55:21 +03:00
* @ out : the FILE pointer
* @ doc : the document
* @ cur : the current node
2001-06-13 17:48:46 +04:00
* @ encoding : the document encoding
* @ format : should formatting spaces been added
2001-02-23 20:55:21 +03:00
*
* Dump an HTML node , recursive behaviour , children are printed too .
2001-06-13 17:48:46 +04:00
*
2001-06-14 15:11:59 +04:00
* TODO : if encoding = = NULL try to save in the doc encoding
*
* returns : the number of byte written or - 1 in case of failure .
2001-02-23 20:55:21 +03:00
*/
2001-06-14 15:11:59 +04:00
int
htmlNodeDumpFileFormat ( FILE * out , xmlDocPtr doc ,
xmlNodePtr cur , const char * encoding , int format ) {
xmlOutputBufferPtr buf ;
xmlCharEncodingHandlerPtr handler = NULL ;
int ret ;
2003-08-08 18:00:28 +04:00
xmlInitParser ( ) ;
2001-06-14 15:11:59 +04:00
if ( encoding ! = NULL ) {
xmlCharEncoding enc ;
enc = xmlParseCharEncoding ( encoding ) ;
if ( enc ! = XML_CHAR_ENCODING_UTF8 ) {
handler = xmlFindCharEncodingHandler ( encoding ) ;
if ( handler = = NULL )
2011-05-16 12:03:50 +04:00
htmlSaveErr ( XML_SAVE_UNKNOWN_ENCODING , NULL , encoding ) ;
2001-06-14 15:11:59 +04:00
}
2018-10-13 17:12:14 +03:00
} else {
/*
* Fallback to HTML or ASCII when the encoding is unspecified
*/
if ( handler = = NULL )
handler = xmlFindCharEncodingHandler ( " HTML " ) ;
if ( handler = = NULL )
handler = xmlFindCharEncodingHandler ( " ascii " ) ;
2001-06-14 15:11:59 +04:00
}
2012-09-11 09:26:36 +04:00
/*
2001-06-14 15:11:59 +04:00
* save the content to a temp buffer .
*/
buf = xmlOutputBufferCreateFile ( out , handler ) ;
if ( buf = = NULL ) return ( 0 ) ;
2021-02-07 15:38:01 +03:00
htmlNodeDumpFormatOutput ( buf , doc , cur , NULL , format ) ;
2001-06-14 15:11:59 +04:00
ret = xmlOutputBufferClose ( buf ) ;
return ( ret ) ;
2001-02-23 20:55:21 +03:00
}
2001-06-13 17:48:46 +04:00
/**
* htmlNodeDumpFile :
* @ out : the FILE pointer
* @ doc : the document
* @ cur : the current node
*
* Dump an HTML node , recursive behaviour , children are printed too ,
* and formatting returns are added .
*/
void
htmlNodeDumpFile ( FILE * out , xmlDocPtr doc , xmlNodePtr cur ) {
htmlNodeDumpFileFormat ( out , doc , cur , NULL , 1 ) ;
}
2001-02-23 20:55:21 +03:00
/**
2005-12-20 18:55:14 +03:00
* htmlDocDumpMemoryFormat :
2001-02-23 20:55:21 +03:00
* @ cur : the document
* @ mem : OUT : the memory pointer
2001-05-30 22:32:34 +04:00
* @ size : OUT : the memory length
2005-12-20 18:55:14 +03:00
* @ format : should formatting spaces been added
2001-02-23 20:55:21 +03:00
*
* Dump an HTML document in memory and return the xmlChar * and it ' s size .
* It ' s up to the caller to free the memory .
*/
void
2005-12-20 18:55:14 +03:00
htmlDocDumpMemoryFormat ( xmlDocPtr cur , xmlChar * * mem , int * size , int format ) {
2001-05-30 22:32:34 +04:00
xmlOutputBufferPtr buf ;
xmlCharEncodingHandlerPtr handler = NULL ;
const char * encoding ;
2001-02-23 20:55:21 +03:00
2003-08-08 18:00:28 +04:00
xmlInitParser ( ) ;
2004-11-06 22:24:28 +03:00
if ( ( mem = = NULL ) | | ( size = = NULL ) )
return ;
2001-02-23 20:55:21 +03:00
if ( cur = = NULL ) {
* mem = NULL ;
* size = 0 ;
return ;
}
2001-05-30 22:32:34 +04:00
encoding = ( const char * ) htmlGetMetaEncoding ( cur ) ;
if ( encoding ! = NULL ) {
xmlCharEncoding enc ;
enc = xmlParseCharEncoding ( encoding ) ;
2018-10-13 16:23:35 +03:00
if ( enc ! = XML_CHAR_ENCODING_UTF8 ) {
2001-05-30 22:32:34 +04:00
handler = xmlFindCharEncodingHandler ( encoding ) ;
2011-05-16 12:03:50 +04:00
if ( handler = = NULL )
htmlSaveErr ( XML_SAVE_UNKNOWN_ENCODING , NULL , encoding ) ;
2001-05-30 22:32:34 +04:00
}
2018-10-13 17:12:14 +03:00
} else {
/*
* Fallback to HTML or ASCII when the encoding is unspecified
*/
if ( handler = = NULL )
handler = xmlFindCharEncodingHandler ( " HTML " ) ;
if ( handler = = NULL )
handler = xmlFindCharEncodingHandler ( " ascii " ) ;
2001-05-30 22:32:34 +04:00
}
2008-09-01 17:08:57 +04:00
buf = xmlAllocOutputBufferInternal ( handler ) ;
2001-02-23 20:55:21 +03:00
if ( buf = = NULL ) {
* mem = NULL ;
* size = 0 ;
return ;
}
2001-05-30 22:32:34 +04:00
2011-05-16 12:03:50 +04:00
htmlDocContentDumpFormatOutput ( buf , cur , NULL , format ) ;
2005-12-20 18:55:14 +03:00
2001-05-30 22:32:34 +04:00
xmlOutputBufferFlush ( buf ) ;
if ( buf - > conv ! = NULL ) {
2012-07-16 10:58:02 +04:00
* size = xmlBufUse ( buf - > conv ) ;
* mem = xmlStrndup ( xmlBufContent ( buf - > conv ) , * size ) ;
2001-05-30 22:32:34 +04:00
} else {
2012-07-16 10:58:02 +04:00
* size = xmlBufUse ( buf - > buffer ) ;
* mem = xmlStrndup ( xmlBufContent ( buf - > buffer ) , * size ) ;
2001-05-30 22:32:34 +04:00
}
( void ) xmlOutputBufferClose ( buf ) ;
2001-02-23 20:55:21 +03:00
}
2005-12-20 18:55:14 +03:00
/**
* htmlDocDumpMemory :
* @ cur : the document
* @ mem : OUT : the memory pointer
* @ size : OUT : the memory length
*
* Dump an HTML document in memory and return the xmlChar * and it ' s size .
* It ' s up to the caller to free the memory .
*/
void
htmlDocDumpMemory ( xmlDocPtr cur , xmlChar * * mem , int * size ) {
htmlDocDumpMemoryFormat ( cur , mem , size , 1 ) ;
}
2001-02-23 20:55:21 +03:00
/************************************************************************
* *
2012-09-11 09:26:36 +04:00
* Dumping HTML tree content to an I / O output buffer *
2001-02-23 20:55:21 +03:00
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2003-01-09 16:19:33 +03:00
void xmlNsListDumpOutput ( xmlOutputBufferPtr buf , xmlNsPtr cur ) ;
2002-08-12 17:27:28 +04:00
2001-02-23 20:55:21 +03:00
/**
2001-06-13 11:45:41 +04:00
* htmlDtdDumpOutput :
2001-02-23 20:55:21 +03:00
* @ buf : the HTML buffer output
* @ doc : the document
* @ encoding : the encoding string
2012-09-11 09:26:36 +04:00
*
2001-03-24 20:00:36 +03:00
* TODO : check whether encoding is needed
*
2001-02-23 20:55:21 +03:00
* Dump the HTML document DTD , if any .
*/
static void
2001-03-24 20:00:36 +03:00
htmlDtdDumpOutput ( xmlOutputBufferPtr buf , xmlDocPtr doc ,
2001-03-26 20:28:29 +04:00
const char * encoding ATTRIBUTE_UNUSED ) {
2001-02-23 20:55:21 +03:00
xmlDtdPtr cur = doc - > intSubset ;
if ( cur = = NULL ) {
2003-10-09 17:14:55 +04:00
htmlSaveErr ( XML_SAVE_NO_DOCTYPE , ( xmlNodePtr ) doc , NULL ) ;
2001-02-23 20:55:21 +03:00
return ;
}
xmlOutputBufferWriteString ( buf , " <!DOCTYPE " ) ;
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > name ) ;
if ( cur - > ExternalID ! = NULL ) {
xmlOutputBufferWriteString ( buf , " PUBLIC " ) ;
2012-07-16 10:58:02 +04:00
xmlBufWriteQuotedString ( buf - > buffer , cur - > ExternalID ) ;
2001-02-23 20:55:21 +03:00
if ( cur - > SystemID ! = NULL ) {
xmlOutputBufferWriteString ( buf , " " ) ;
2012-07-16 10:58:02 +04:00
xmlBufWriteQuotedString ( buf - > buffer , cur - > SystemID ) ;
2012-09-11 09:26:36 +04:00
}
2015-04-03 17:52:36 +03:00
} else if ( cur - > SystemID ! = NULL & &
xmlStrcmp ( cur - > SystemID , BAD_CAST " about:legacy-compat " ) ) {
2001-02-23 20:55:21 +03:00
xmlOutputBufferWriteString ( buf , " SYSTEM " ) ;
2012-07-16 10:58:02 +04:00
xmlBufWriteQuotedString ( buf - > buffer , cur - > SystemID ) ;
2001-02-23 20:55:21 +03:00
}
xmlOutputBufferWriteString ( buf , " > \n " ) ;
}
/**
2001-06-13 11:45:41 +04:00
* htmlAttrDumpOutput :
2001-02-23 20:55:21 +03:00
* @ buf : the HTML buffer output
* @ doc : the document
* @ cur : the attribute pointer
*
* Dump an HTML attribute
*/
static void
2021-02-07 15:38:01 +03:00
htmlAttrDumpOutput ( xmlOutputBufferPtr buf , xmlDocPtr doc , xmlAttrPtr cur ) {
2001-02-23 20:55:21 +03:00
xmlChar * value ;
2001-06-13 11:45:41 +04:00
/*
2012-09-05 07:45:32 +04:00
* The html output method should not escape a & character
* occurring in an attribute value immediately followed by
* a { character ( see Section B .7 .1 of the HTML 4.0 Recommendation ) .
* This is implemented in xmlEncodeEntitiesReentrant
2001-06-13 11:45:41 +04:00
*/
2001-02-23 20:55:21 +03:00
if ( cur = = NULL ) {
return ;
}
xmlOutputBufferWriteString ( buf , " " ) ;
2003-09-15 08:58:14 +04:00
if ( ( cur - > ns ! = NULL ) & & ( cur - > ns - > prefix ! = NULL ) ) {
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > ns - > prefix ) ;
xmlOutputBufferWriteString ( buf , " : " ) ;
}
2001-02-23 20:55:21 +03:00
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > name ) ;
2002-08-12 17:27:28 +04:00
if ( ( cur - > children ! = NULL ) & & ( ! htmlIsBooleanAttr ( cur - > name ) ) ) {
2001-02-23 20:55:21 +03:00
value = xmlNodeListGetString ( doc , cur - > children , 0 ) ;
if ( value ) {
xmlOutputBufferWriteString ( buf , " = " ) ;
2003-03-27 17:08:24 +03:00
if ( ( cur - > ns = = NULL ) & & ( cur - > parent ! = NULL ) & &
( cur - > parent - > ns = = NULL ) & &
( ( ! xmlStrcasecmp ( cur - > name , BAD_CAST " href " ) ) | |
( ! xmlStrcasecmp ( cur - > name , BAD_CAST " action " ) ) | |
2005-03-30 00:30:17 +04:00
( ! xmlStrcasecmp ( cur - > name , BAD_CAST " src " ) ) | |
( ( ! xmlStrcasecmp ( cur - > name , BAD_CAST " name " ) ) & &
( ! xmlStrcasecmp ( cur - > parent - > name , BAD_CAST " a " ) ) ) ) ) {
2020-08-15 19:32:29 +03:00
xmlChar * escaped ;
2002-04-15 02:00:22 +04:00
xmlChar * tmp = value ;
2003-10-18 20:20:14 +04:00
while ( IS_BLANK_CH ( * tmp ) ) tmp + + ;
2002-04-15 02:00:22 +04:00
2020-08-15 19:32:29 +03:00
/*
* the < and > have already been escaped at the entity level
* And doing so here breaks server side includes
*/
escaped = xmlURIEscapeStr ( tmp , BAD_CAST " @/:=?;#%&,+<> " ) ;
if ( escaped ! = NULL ) {
xmlBufWriteQuotedString ( buf - > buffer , escaped ) ;
xmlFree ( escaped ) ;
} else {
xmlBufWriteQuotedString ( buf - > buffer , value ) ;
2002-04-15 02:00:22 +04:00
}
} else {
2012-07-16 10:58:02 +04:00
xmlBufWriteQuotedString ( buf - > buffer , value ) ;
2002-04-15 02:00:22 +04:00
}
2001-02-23 20:55:21 +03:00
xmlFree ( value ) ;
} else {
xmlOutputBufferWriteString ( buf , " = \" \" " ) ;
}
}
}
/**
2001-06-13 17:48:46 +04:00
* htmlNodeDumpFormatOutput :
2001-02-23 20:55:21 +03:00
* @ buf : the HTML buffer output
* @ doc : the document
* @ cur : the current node
2021-02-07 15:38:01 +03:00
* @ encoding : the encoding string ( unused )
2001-06-13 17:48:46 +04:00
* @ format : should formatting spaces been added
2001-02-23 20:55:21 +03:00
*
* Dump an HTML node , recursive behaviour , children are printed too .
*/
void
2001-06-13 17:48:46 +04:00
htmlNodeDumpFormatOutput ( xmlOutputBufferPtr buf , xmlDocPtr doc ,
2021-02-07 15:38:01 +03:00
xmlNodePtr cur , const char * encoding ATTRIBUTE_UNUSED ,
int format ) {
2021-05-18 21:08:28 +03:00
xmlNodePtr root , parent ;
2020-07-28 03:42:37 +03:00
xmlAttrPtr attr ;
2001-08-17 03:26:59 +04:00
const htmlElemDesc * info ;
2001-02-23 20:55:21 +03:00
2003-08-08 18:00:28 +04:00
xmlInitParser ( ) ;
2004-11-05 13:03:46 +03:00
if ( ( cur = = NULL ) | | ( buf = = NULL ) ) {
2001-02-23 20:55:21 +03:00
return ;
}
2020-07-28 03:42:37 +03:00
root = cur ;
2021-05-18 21:08:28 +03:00
parent = cur - > parent ;
2020-07-28 03:42:37 +03:00
while ( 1 ) {
switch ( cur - > type ) {
case XML_HTML_DOCUMENT_NODE :
case XML_DOCUMENT_NODE :
if ( ( ( xmlDocPtr ) cur ) - > intSubset ! = NULL ) {
htmlDtdDumpOutput ( buf , ( xmlDocPtr ) cur , NULL ) ;
}
2021-06-07 16:09:53 +03:00
if ( cur - > children ! = NULL ) {
/* Always validate cur->parent when descending. */
if ( cur - > parent = = parent ) {
parent = cur ;
cur = cur - > children ;
continue ;
}
} else {
xmlOutputBufferWriteString ( buf , " \n " ) ;
2020-07-28 03:42:37 +03:00
}
break ;
2001-02-23 20:55:21 +03:00
2020-07-28 03:42:37 +03:00
case XML_ELEMENT_NODE :
2021-05-18 21:08:28 +03:00
/*
* Some users like lxml are known to pass nodes with a corrupted
* tree structure . Fall back to a recursive call to handle this
* case .
*/
if ( ( cur - > parent ! = parent ) & & ( cur - > children ! = NULL ) ) {
htmlNodeDumpFormatOutput ( buf , doc , cur , encoding , format ) ;
break ;
}
2020-07-28 03:42:37 +03:00
/*
* Get specific HTML info for that node .
*/
if ( cur - > ns = = NULL )
info = htmlTagLookup ( cur - > name ) ;
else
info = NULL ;
xmlOutputBufferWriteString ( buf , " < " ) ;
2003-04-11 01:40:49 +04:00
if ( ( cur - > ns ! = NULL ) & & ( cur - > ns - > prefix ! = NULL ) ) {
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > ns - > prefix ) ;
xmlOutputBufferWriteString ( buf , " : " ) ;
}
2020-07-28 03:42:37 +03:00
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > name ) ;
if ( cur - > nsDef )
xmlNsListDumpOutput ( buf , cur - > nsDef ) ;
attr = cur - > properties ;
while ( attr ! = NULL ) {
2021-02-07 15:38:01 +03:00
htmlAttrDumpOutput ( buf , doc , attr ) ;
2020-07-28 03:42:37 +03:00
attr = attr - > next ;
}
if ( ( info ! = NULL ) & & ( info - > empty ) ) {
xmlOutputBufferWriteString ( buf , " > " ) ;
} else if ( cur - > children = = NULL ) {
if ( ( info ! = NULL ) & & ( info - > saveEndTag ! = 0 ) & &
( xmlStrcmp ( BAD_CAST info - > name , BAD_CAST " html " ) ) & &
( xmlStrcmp ( BAD_CAST info - > name , BAD_CAST " body " ) ) ) {
xmlOutputBufferWriteString ( buf , " > " ) ;
} else {
xmlOutputBufferWriteString ( buf , " ></ " ) ;
if ( ( cur - > ns ! = NULL ) & & ( cur - > ns - > prefix ! = NULL ) ) {
xmlOutputBufferWriteString ( buf ,
( const char * ) cur - > ns - > prefix ) ;
xmlOutputBufferWriteString ( buf , " : " ) ;
}
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > name ) ;
xmlOutputBufferWriteString ( buf , " > " ) ;
}
} else {
xmlOutputBufferWriteString ( buf , " > " ) ;
if ( ( format ) & & ( info ! = NULL ) & & ( ! info - > isinline ) & &
( cur - > children - > type ! = HTML_TEXT_NODE ) & &
( cur - > children - > type ! = HTML_ENTITY_REF_NODE ) & &
( cur - > children ! = cur - > last ) & &
( cur - > name ! = NULL ) & &
( cur - > name [ 0 ] ! = ' p ' ) ) /* p, pre, param */
xmlOutputBufferWriteString ( buf , " \n " ) ;
2021-05-18 21:08:28 +03:00
parent = cur ;
2020-07-28 03:42:37 +03:00
cur = cur - > children ;
continue ;
}
if ( ( format ) & & ( cur - > next ! = NULL ) & &
( info ! = NULL ) & & ( ! info - > isinline ) ) {
if ( ( cur - > next - > type ! = HTML_TEXT_NODE ) & &
( cur - > next - > type ! = HTML_ENTITY_REF_NODE ) & &
2021-05-18 21:08:28 +03:00
( parent ! = NULL ) & &
( parent - > name ! = NULL ) & &
( parent - > name [ 0 ] ! = ' p ' ) ) /* p, pre, param */
2020-07-28 03:42:37 +03:00
xmlOutputBufferWriteString ( buf , " \n " ) ;
}
break ;
case XML_ATTRIBUTE_NODE :
2021-02-07 15:38:01 +03:00
htmlAttrDumpOutput ( buf , doc , ( xmlAttrPtr ) cur ) ;
2020-07-28 03:42:37 +03:00
break ;
case HTML_TEXT_NODE :
if ( cur - > content = = NULL )
break ;
if ( ( ( cur - > name = = ( const xmlChar * ) xmlStringText ) | |
( cur - > name ! = ( const xmlChar * ) xmlStringTextNoenc ) ) & &
2021-05-18 21:08:28 +03:00
( ( parent = = NULL ) | |
( ( xmlStrcasecmp ( parent - > name , BAD_CAST " script " ) ) & &
( xmlStrcasecmp ( parent - > name , BAD_CAST " style " ) ) ) ) ) {
2020-07-28 03:42:37 +03:00
xmlChar * buffer ;
buffer = xmlEncodeEntitiesReentrant ( doc , cur - > content ) ;
if ( buffer ! = NULL ) {
xmlOutputBufferWriteString ( buf , ( const char * ) buffer ) ;
xmlFree ( buffer ) ;
}
} else {
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > content ) ;
}
break ;
case HTML_COMMENT_NODE :
if ( cur - > content ! = NULL ) {
xmlOutputBufferWriteString ( buf , " <!-- " ) ;
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > content ) ;
xmlOutputBufferWriteString ( buf , " --> " ) ;
}
break ;
case HTML_PI_NODE :
if ( cur - > name ! = NULL ) {
xmlOutputBufferWriteString ( buf , " <? " ) ;
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > name ) ;
if ( cur - > content ! = NULL ) {
xmlOutputBufferWriteString ( buf , " " ) ;
xmlOutputBufferWriteString ( buf ,
( const char * ) cur - > content ) ;
}
xmlOutputBufferWriteString ( buf , " > " ) ;
}
break ;
case HTML_ENTITY_REF_NODE :
xmlOutputBufferWriteString ( buf , " & " ) ;
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > name ) ;
xmlOutputBufferWriteString ( buf , " ; " ) ;
break ;
case HTML_PRESERVE_NODE :
if ( cur - > content ! = NULL ) {
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > content ) ;
}
break ;
default :
break ;
}
while ( 1 ) {
if ( cur = = root )
return ;
if ( cur - > next ! = NULL ) {
cur = cur - > next ;
break ;
}
2021-05-18 21:08:28 +03:00
cur = parent ;
/* cur->parent was validated when descending. */
parent = cur - > parent ;
2020-07-28 03:42:37 +03:00
if ( ( cur - > type = = XML_HTML_DOCUMENT_NODE ) | |
( cur - > type = = XML_DOCUMENT_NODE ) ) {
xmlOutputBufferWriteString ( buf , " \n " ) ;
} else {
if ( ( format ) & & ( cur - > ns = = NULL ) )
info = htmlTagLookup ( cur - > name ) ;
else
info = NULL ;
if ( ( format ) & & ( info ! = NULL ) & & ( ! info - > isinline ) & &
( cur - > last - > type ! = HTML_TEXT_NODE ) & &
( cur - > last - > type ! = HTML_ENTITY_REF_NODE ) & &
( cur - > children ! = cur - > last ) & &
( cur - > name ! = NULL ) & &
( cur - > name [ 0 ] ! = ' p ' ) ) /* p, pre, param */
xmlOutputBufferWriteString ( buf , " \n " ) ;
xmlOutputBufferWriteString ( buf , " </ " ) ;
if ( ( cur - > ns ! = NULL ) & & ( cur - > ns - > prefix ! = NULL ) ) {
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > ns - > prefix ) ;
xmlOutputBufferWriteString ( buf , " : " ) ;
}
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > name ) ;
xmlOutputBufferWriteString ( buf , " > " ) ;
if ( ( format ) & & ( info ! = NULL ) & & ( ! info - > isinline ) & &
( cur - > next ! = NULL ) ) {
if ( ( cur - > next - > type ! = HTML_TEXT_NODE ) & &
( cur - > next - > type ! = HTML_ENTITY_REF_NODE ) & &
2021-05-18 21:08:28 +03:00
( parent ! = NULL ) & &
( parent - > name ! = NULL ) & &
( parent - > name [ 0 ] ! = ' p ' ) ) /* p, pre, param */
2020-07-28 03:42:37 +03:00
xmlOutputBufferWriteString ( buf , " \n " ) ;
}
}
}
2001-02-23 20:55:21 +03:00
}
}
/**
2001-06-13 17:48:46 +04:00
* htmlNodeDumpOutput :
* @ buf : the HTML buffer output
* @ doc : the document
* @ cur : the current node
2021-02-07 15:38:01 +03:00
* @ encoding : the encoding string ( unused )
2001-06-13 17:48:46 +04:00
*
* Dump an HTML node , recursive behaviour , children are printed too ,
* and formatting returns / spaces are added .
*/
void
htmlNodeDumpOutput ( xmlOutputBufferPtr buf , xmlDocPtr doc ,
2021-02-07 15:38:01 +03:00
xmlNodePtr cur , const char * encoding ATTRIBUTE_UNUSED ) {
htmlNodeDumpFormatOutput ( buf , doc , cur , NULL , 1 ) ;
2001-06-13 17:48:46 +04:00
}
/**
* htmlDocContentDumpFormatOutput :
2001-02-23 20:55:21 +03:00
* @ buf : the HTML buffer output
* @ cur : the document
2021-02-07 15:38:01 +03:00
* @ encoding : the encoding string ( unused )
2002-01-22 21:15:52 +03:00
* @ format : should formatting spaces been added
2001-02-23 20:55:21 +03:00
*
* Dump an HTML document .
*/
void
2001-06-13 17:48:46 +04:00
htmlDocContentDumpFormatOutput ( xmlOutputBufferPtr buf , xmlDocPtr cur ,
2021-02-07 15:38:01 +03:00
const char * encoding ATTRIBUTE_UNUSED ,
int format ) {
2022-05-14 18:48:01 +03:00
int type = 0 ;
if ( cur ) {
type = cur - > type ;
cur - > type = XML_HTML_DOCUMENT_NODE ;
}
2021-02-07 15:38:01 +03:00
htmlNodeDumpFormatOutput ( buf , cur , ( xmlNodePtr ) cur , NULL , format ) ;
2022-05-14 18:48:01 +03:00
if ( cur )
cur - > type = ( xmlElementType ) type ;
2001-02-23 20:55:21 +03:00
}
2001-06-13 17:48:46 +04:00
/**
* htmlDocContentDumpOutput :
* @ buf : the HTML buffer output
* @ cur : the document
2021-02-07 15:38:01 +03:00
* @ encoding : the encoding string ( unused )
2001-06-13 17:48:46 +04:00
*
2019-09-30 18:04:54 +03:00
* Dump an HTML document . Formatting return / spaces are added .
2001-06-13 17:48:46 +04:00
*/
void
htmlDocContentDumpOutput ( xmlOutputBufferPtr buf , xmlDocPtr cur ,
2021-02-07 15:38:01 +03:00
const char * encoding ATTRIBUTE_UNUSED ) {
htmlNodeDumpFormatOutput ( buf , cur , ( xmlNodePtr ) cur , NULL , 1 ) ;
2001-06-13 17:48:46 +04:00
}
2001-02-23 20:55:21 +03:00
/************************************************************************
* *
* Saving functions front - ends *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* htmlDocDump :
* @ f : the FILE *
* @ cur : the document
*
* Dump an HTML document to an open FILE .
*
* returns : the number of byte written or - 1 in case of failure .
*/
int
htmlDocDump ( FILE * f , xmlDocPtr cur ) {
xmlOutputBufferPtr buf ;
xmlCharEncodingHandlerPtr handler = NULL ;
const char * encoding ;
int ret ;
2003-08-08 18:00:28 +04:00
xmlInitParser ( ) ;
2004-11-04 13:49:00 +03:00
if ( ( cur = = NULL ) | | ( f = = NULL ) ) {
2001-02-23 20:55:21 +03:00
return ( - 1 ) ;
}
encoding = ( const char * ) htmlGetMetaEncoding ( cur ) ;
if ( encoding ! = NULL ) {
xmlCharEncoding enc ;
enc = xmlParseCharEncoding ( encoding ) ;
2018-10-13 16:23:35 +03:00
if ( enc ! = XML_CHAR_ENCODING_UTF8 ) {
2001-02-23 20:55:21 +03:00
handler = xmlFindCharEncodingHandler ( encoding ) ;
if ( handler = = NULL )
2011-05-16 12:03:50 +04:00
htmlSaveErr ( XML_SAVE_UNKNOWN_ENCODING , NULL , encoding ) ;
2001-02-23 20:55:21 +03:00
}
2018-10-13 17:12:14 +03:00
} else {
/*
* Fallback to HTML or ASCII when the encoding is unspecified
*/
if ( handler = = NULL )
handler = xmlFindCharEncodingHandler ( " HTML " ) ;
if ( handler = = NULL )
handler = xmlFindCharEncodingHandler ( " ascii " ) ;
2001-02-23 20:55:21 +03:00
}
buf = xmlOutputBufferCreateFile ( f , handler ) ;
if ( buf = = NULL ) return ( - 1 ) ;
htmlDocContentDumpOutput ( buf , cur , NULL ) ;
ret = xmlOutputBufferClose ( buf ) ;
return ( ret ) ;
}
/**
* htmlSaveFile :
* @ filename : the filename ( or URL )
* @ cur : the document
*
* Dump an HTML document to a file . If @ filename is " - " the stdout file is
* used .
* returns : the number of byte written or - 1 in case of failure .
*/
int
htmlSaveFile ( const char * filename , xmlDocPtr cur ) {
xmlOutputBufferPtr buf ;
xmlCharEncodingHandlerPtr handler = NULL ;
const char * encoding ;
int ret ;
2004-11-02 17:52:23 +03:00
if ( ( cur = = NULL ) | | ( filename = = NULL ) )
return ( - 1 ) ;
2012-09-11 09:26:36 +04:00
2003-08-08 18:00:28 +04:00
xmlInitParser ( ) ;
2001-02-23 20:55:21 +03:00
encoding = ( const char * ) htmlGetMetaEncoding ( cur ) ;
if ( encoding ! = NULL ) {
xmlCharEncoding enc ;
enc = xmlParseCharEncoding ( encoding ) ;
2018-10-13 16:23:35 +03:00
if ( enc ! = XML_CHAR_ENCODING_UTF8 ) {
2001-02-23 20:55:21 +03:00
handler = xmlFindCharEncodingHandler ( encoding ) ;
if ( handler = = NULL )
2011-05-16 12:03:50 +04:00
htmlSaveErr ( XML_SAVE_UNKNOWN_ENCODING , NULL , encoding ) ;
2001-02-23 20:55:21 +03:00
}
2018-10-13 17:12:14 +03:00
} else {
/*
* Fallback to HTML or ASCII when the encoding is unspecified
*/
if ( handler = = NULL )
handler = xmlFindCharEncodingHandler ( " HTML " ) ;
if ( handler = = NULL )
handler = xmlFindCharEncodingHandler ( " ascii " ) ;
2001-02-23 20:55:21 +03:00
}
2012-09-11 09:26:36 +04:00
/*
2001-02-23 20:55:21 +03:00
* save the content to a temp buffer .
*/
buf = xmlOutputBufferCreateFilename ( filename , handler , cur - > compression ) ;
if ( buf = = NULL ) return ( 0 ) ;
htmlDocContentDumpOutput ( buf , cur , NULL ) ;
ret = xmlOutputBufferClose ( buf ) ;
return ( ret ) ;
}
/**
2001-06-13 17:48:46 +04:00
* htmlSaveFileFormat :
2001-02-23 20:55:21 +03:00
* @ filename : the filename
* @ cur : the document
2001-06-13 17:48:46 +04:00
* @ format : should formatting spaces been added
* @ encoding : the document encoding
2001-02-23 20:55:21 +03:00
*
* Dump an HTML document to a file using a given encoding .
2012-09-11 09:26:36 +04:00
*
2001-02-23 20:55:21 +03:00
* returns : the number of byte written or - 1 in case of failure .
*/
int
2001-06-13 17:48:46 +04:00
htmlSaveFileFormat ( const char * filename , xmlDocPtr cur ,
const char * encoding , int format ) {
2001-02-23 20:55:21 +03:00
xmlOutputBufferPtr buf ;
xmlCharEncodingHandlerPtr handler = NULL ;
int ret ;
2004-11-02 17:52:23 +03:00
if ( ( cur = = NULL ) | | ( filename = = NULL ) )
return ( - 1 ) ;
2009-08-13 01:03:23 +04:00
2003-08-08 18:00:28 +04:00
xmlInitParser ( ) ;
2001-02-23 20:55:21 +03:00
if ( encoding ! = NULL ) {
xmlCharEncoding enc ;
enc = xmlParseCharEncoding ( encoding ) ;
2018-10-13 16:23:35 +03:00
if ( enc ! = XML_CHAR_ENCODING_UTF8 ) {
2001-02-23 20:55:21 +03:00
handler = xmlFindCharEncodingHandler ( encoding ) ;
if ( handler = = NULL )
2011-05-16 12:03:50 +04:00
htmlSaveErr ( XML_SAVE_UNKNOWN_ENCODING , NULL , encoding ) ;
2001-02-23 20:55:21 +03:00
}
2009-08-13 01:03:23 +04:00
htmlSetMetaEncoding ( cur , ( const xmlChar * ) encoding ) ;
2001-04-02 19:16:19 +04:00
} else {
htmlSetMetaEncoding ( cur , ( const xmlChar * ) " UTF-8 " ) ;
2001-02-23 20:55:21 +03:00
2018-10-13 17:12:14 +03:00
/*
* Fallback to HTML or ASCII when the encoding is unspecified
*/
if ( handler = = NULL )
handler = xmlFindCharEncodingHandler ( " HTML " ) ;
if ( handler = = NULL )
handler = xmlFindCharEncodingHandler ( " ascii " ) ;
}
2001-02-23 20:55:21 +03:00
2012-09-11 09:26:36 +04:00
/*
2001-02-23 20:55:21 +03:00
* save the content to a temp buffer .
*/
buf = xmlOutputBufferCreateFilename ( filename , handler , 0 ) ;
if ( buf = = NULL ) return ( 0 ) ;
2001-06-13 17:48:46 +04:00
htmlDocContentDumpFormatOutput ( buf , cur , encoding , format ) ;
2001-02-23 20:55:21 +03:00
ret = xmlOutputBufferClose ( buf ) ;
return ( ret ) ;
}
2001-06-13 17:48:46 +04:00
/**
* htmlSaveFileEnc :
* @ filename : the filename
* @ cur : the document
* @ encoding : the document encoding
*
* Dump an HTML document to a file using a given encoding
* and formatting returns / spaces are added .
2012-09-11 09:26:36 +04:00
*
2001-06-13 17:48:46 +04:00
* returns : the number of byte written or - 1 in case of failure .
*/
int
htmlSaveFileEnc ( const char * filename , xmlDocPtr cur , const char * encoding ) {
return ( htmlSaveFileFormat ( filename , cur , encoding , 1 ) ) ;
}
2003-09-29 17:20:24 +04:00
# endif /* LIBXML_OUTPUT_ENABLED */
2002-08-12 17:27:28 +04:00
2001-02-23 20:55:21 +03:00
# endif /* LIBXML_HTML_ENABLED */