2001-02-23 17:55:21 +00:00
/*
2001-12-31 16:16:02 +00:00
* HTMLtree . c : implementation of access function for an HTML tree .
2001-02-23 17:55:21 +00:00
*
* See Copyright for the status of this software .
*
2001-06-24 12:13:24 +00:00
* daniel @ veillard . com
2001-02-23 17:55:21 +00:00
*/
2002-03-18 19:37:11 +00:00
# define IN_LIBXML
2001-04-21 16:57:29 +00:00
# include "libxml.h"
2001-02-23 17:55:21 +00:00
# ifdef LIBXML_HTML_ENABLED
2002-11-27 19:39:27 +00:00
# include <string.h> /* for memset() only ! */
2001-02-23 17:55:21 +00:00
# include <ctype.h>
# include <stdlib.h>
# include <libxml/xmlmemory.h>
# include <libxml/HTMLparser.h>
# include <libxml/HTMLtree.h>
# include <libxml/entities.h>
# include <libxml/xmlerror.h>
# include <libxml/parserInternals.h>
2002-04-14 22:00:22 +00:00
# include <libxml/uri.h>
2001-02-23 17:55:21 +00:00
2022-08-26 01:22:33 +02:00
# include "private/buf.h"
# include "private/error.h"
# include "private/io.h"
# include "private/save.h"
2012-07-16 14:58:02 +08:00
2001-02-23 17:55:21 +00:00
/************************************************************************
* *
2012-09-11 13:26:36 +08:00
* Getting / Setting encoding meta tags *
2001-02-23 17:55:21 +00:00
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* htmlGetMetaEncoding :
* @ doc : the document
2012-09-11 13:26:36 +08:00
*
2001-02-23 17:55:21 +00:00
* Encoding definition lookup in the Meta tags
*
* Returns the current encoding as flagged in the HTML source
*/
const xmlChar *
htmlGetMetaEncoding ( htmlDocPtr doc ) {
htmlNodePtr cur ;
const xmlChar * content ;
const xmlChar * encoding ;
if ( doc = = NULL )
return ( NULL ) ;
cur = doc - > children ;
/*
* Search the html
*/
while ( cur ! = NULL ) {
2001-10-23 13:10:19 +00:00
if ( ( cur - > type = = XML_ELEMENT_NODE ) & & ( cur - > name ! = NULL ) ) {
2001-02-23 17:55:21 +00:00
if ( xmlStrEqual ( cur - > name , BAD_CAST " html " ) )
break ;
if ( xmlStrEqual ( cur - > name , BAD_CAST " head " ) )
goto found_head ;
if ( xmlStrEqual ( cur - > name , BAD_CAST " meta " ) )
goto found_meta ;
}
cur = cur - > next ;
}
if ( cur = = NULL )
return ( NULL ) ;
cur = cur - > children ;
/*
* Search the head
*/
while ( cur ! = NULL ) {
2001-10-23 13:10:19 +00:00
if ( ( cur - > type = = XML_ELEMENT_NODE ) & & ( cur - > name ! = NULL ) ) {
2001-02-23 17:55:21 +00:00
if ( xmlStrEqual ( cur - > name , BAD_CAST " head " ) )
break ;
if ( xmlStrEqual ( cur - > name , BAD_CAST " meta " ) )
goto found_meta ;
}
cur = cur - > next ;
}
if ( cur = = NULL )
return ( NULL ) ;
found_head :
cur = cur - > children ;
/*
* Search the meta elements
*/
found_meta :
while ( cur ! = NULL ) {
2001-10-23 13:10:19 +00:00
if ( ( cur - > type = = XML_ELEMENT_NODE ) & & ( cur - > name ! = NULL ) ) {
2001-02-23 17:55:21 +00:00
if ( xmlStrEqual ( cur - > name , BAD_CAST " meta " ) ) {
xmlAttrPtr attr = cur - > properties ;
int http ;
const xmlChar * value ;
content = NULL ;
http = 0 ;
while ( attr ! = NULL ) {
if ( ( attr - > children ! = NULL ) & &
( attr - > children - > type = = XML_TEXT_NODE ) & &
( attr - > children - > next = = NULL ) ) {
value = attr - > children - > content ;
if ( ( ! xmlStrcasecmp ( attr - > name , BAD_CAST " http-equiv " ) )
& & ( ! xmlStrcasecmp ( value , BAD_CAST " Content-Type " ) ) )
http = 1 ;
else if ( ( value ! = NULL )
& & ( ! xmlStrcasecmp ( attr - > name , BAD_CAST " content " ) ) )
content = value ;
if ( ( http ! = 0 ) & & ( content ! = NULL ) )
goto found_content ;
}
attr = attr - > next ;
}
}
}
cur = cur - > next ;
}
return ( NULL ) ;
found_content :
encoding = xmlStrstr ( content , BAD_CAST " charset= " ) ;
2012-09-11 13:26:36 +08:00
if ( encoding = = NULL )
2001-02-23 17:55:21 +00:00
encoding = xmlStrstr ( content , BAD_CAST " Charset= " ) ;
2012-09-11 13:26:36 +08:00
if ( encoding = = NULL )
2001-02-23 17:55:21 +00:00
encoding = xmlStrstr ( content , BAD_CAST " CHARSET= " ) ;
if ( encoding ! = NULL ) {
encoding + = 8 ;
} else {
encoding = xmlStrstr ( content , BAD_CAST " charset = " ) ;
2012-09-11 13:26:36 +08:00
if ( encoding = = NULL )
2001-02-23 17:55:21 +00:00
encoding = xmlStrstr ( content , BAD_CAST " Charset = " ) ;
2012-09-11 13:26:36 +08:00
if ( encoding = = NULL )
2001-02-23 17:55:21 +00:00
encoding = xmlStrstr ( content , BAD_CAST " CHARSET = " ) ;
if ( encoding ! = NULL )
encoding + = 9 ;
}
if ( encoding ! = NULL ) {
while ( ( * encoding = = ' ' ) | | ( * encoding = = ' \t ' ) ) encoding + + ;
}
return ( encoding ) ;
}
/**
* htmlSetMetaEncoding :
* @ doc : the document
* @ encoding : the encoding string
2012-05-11 12:38:23 +08:00
*
2001-02-23 17:55:21 +00:00
* Sets the current encoding in the Meta tags
* NOTE : this will not change the document content encoding , just
* the META flag associated .
*
* Returns 0 in case of success and - 1 in case of error
*/
int
htmlSetMetaEncoding ( htmlDocPtr doc , const xmlChar * encoding ) {
2009-08-12 23:03:23 +02:00
htmlNodePtr cur , meta = NULL , head = NULL ;
const xmlChar * content = NULL ;
2001-02-23 17:55:21 +00:00
char newcontent [ 100 ] ;
2012-05-11 12:38:23 +08:00
newcontent [ 0 ] = 0 ;
2001-02-23 17:55:21 +00:00
if ( doc = = NULL )
return ( - 1 ) ;
2009-08-12 15:59:01 +02:00
/* html isn't a real encoding it's just libxml2 way to get entities */
if ( ! xmlStrcasecmp ( encoding , BAD_CAST " html " ) )
return ( - 1 ) ;
2001-02-23 17:55:21 +00:00
if ( encoding ! = NULL ) {
snprintf ( newcontent , sizeof ( newcontent ) , " text/html; charset=%s " ,
2004-09-18 04:52:08 +00:00
( char * ) encoding ) ;
2001-02-23 17:55:21 +00:00
newcontent [ sizeof ( newcontent ) - 1 ] = 0 ;
}
cur = doc - > children ;
/*
* Search the html
*/
while ( cur ! = NULL ) {
2001-10-23 13:10:19 +00:00
if ( ( cur - > type = = XML_ELEMENT_NODE ) & & ( cur - > name ! = NULL ) ) {
2001-04-18 09:45:35 +00:00
if ( xmlStrcasecmp ( cur - > name , BAD_CAST " html " ) = = 0 )
break ;
if ( xmlStrcasecmp ( cur - > name , BAD_CAST " head " ) = = 0 )
goto found_head ;
if ( xmlStrcasecmp ( cur - > name , BAD_CAST " meta " ) = = 0 )
goto found_meta ;
2001-02-23 17:55:21 +00:00
}
cur = cur - > next ;
}
if ( cur = = NULL )
return ( - 1 ) ;
cur = cur - > children ;
/*
* Search the head
*/
while ( cur ! = NULL ) {
2001-10-23 13:10:19 +00:00
if ( ( cur - > type = = XML_ELEMENT_NODE ) & & ( cur - > name ! = NULL ) ) {
2001-04-18 09:45:35 +00:00
if ( xmlStrcasecmp ( cur - > name , BAD_CAST " head " ) = = 0 )
break ;
2009-08-12 23:03:23 +02:00
if ( xmlStrcasecmp ( cur - > name , BAD_CAST " meta " ) = = 0 ) {
head = cur - > parent ;
2001-04-18 09:45:35 +00:00
goto found_meta ;
2009-08-12 23:03:23 +02:00
}
2001-02-23 17:55:21 +00:00
}
cur = cur - > next ;
}
if ( cur = = NULL )
return ( - 1 ) ;
found_head :
2009-08-12 23:03:23 +02:00
head = cur ;
if ( cur - > children = = NULL )
goto create ;
2001-02-23 17:55:21 +00:00
cur = cur - > children ;
found_meta :
/*
2009-08-12 23:03:23 +02:00
* Search and update all the remaining the meta elements carrying
2020-03-08 17:19:42 +01:00
* encoding information
2001-02-23 17:55:21 +00:00
*/
while ( cur ! = NULL ) {
2001-10-23 13:10:19 +00:00
if ( ( cur - > type = = XML_ELEMENT_NODE ) & & ( cur - > name ! = NULL ) ) {
2001-04-18 09:45:35 +00:00
if ( xmlStrcasecmp ( cur - > name , BAD_CAST " meta " ) = = 0 ) {
2001-02-23 17:55:21 +00:00
xmlAttrPtr attr = cur - > properties ;
int http ;
const xmlChar * value ;
content = NULL ;
http = 0 ;
while ( attr ! = NULL ) {
if ( ( attr - > children ! = NULL ) & &
( attr - > children - > type = = XML_TEXT_NODE ) & &
( attr - > children - > next = = NULL ) ) {
value = attr - > children - > content ;
if ( ( ! xmlStrcasecmp ( attr - > name , BAD_CAST " http-equiv " ) )
& & ( ! xmlStrcasecmp ( value , BAD_CAST " Content-Type " ) ) )
http = 1 ;
2009-08-12 23:03:23 +02:00
else
2001-04-18 09:45:35 +00:00
{
2012-05-11 12:38:23 +08:00
if ( ( value ! = NULL ) & &
2009-08-12 23:03:23 +02:00
( ! xmlStrcasecmp ( attr - > name , BAD_CAST " content " ) ) )
content = value ;
2001-04-18 09:45:35 +00:00
}
2002-03-06 21:39:42 +00:00
if ( ( http ! = 0 ) & & ( content ! = NULL ) )
2001-02-23 17:55:21 +00:00
break ;
}
attr = attr - > next ;
}
2002-03-06 21:39:42 +00:00
if ( ( http ! = 0 ) & & ( content ! = NULL ) ) {
2001-02-23 17:55:21 +00:00
meta = cur ;
2009-08-12 23:03:23 +02:00
break ;
2001-02-23 17:55:21 +00:00
}
}
}
cur = cur - > next ;
}
2009-08-12 23:03:23 +02:00
create :
if ( meta = = NULL ) {
if ( ( encoding ! = NULL ) & & ( head ! = NULL ) ) {
/*
* Create a new Meta element with the right attributes
*/
meta = xmlNewDocNode ( doc , NULL , BAD_CAST " meta " , NULL ) ;
if ( head - > children = = NULL )
xmlAddChild ( head , meta ) ;
else
xmlAddPrevSibling ( head - > children , meta ) ;
xmlNewProp ( meta , BAD_CAST " http-equiv " , BAD_CAST " Content-Type " ) ;
xmlNewProp ( meta , BAD_CAST " content " , BAD_CAST newcontent ) ;
}
} else {
2012-05-11 12:38:23 +08:00
/* remove the meta tag if NULL is passed */
if ( encoding = = NULL ) {
xmlUnlinkNode ( meta ) ;
xmlFreeNode ( meta ) ;
}
2009-08-12 23:03:23 +02:00
/* change the document only if there is a real encoding change */
2012-05-11 12:38:23 +08:00
else if ( xmlStrcasestr ( content , encoding ) = = NULL ) {
2009-08-12 23:03:23 +02:00
xmlSetProp ( meta , BAD_CAST " content " , BAD_CAST newcontent ) ;
}
}
2001-02-23 17:55:21 +00:00
return ( 0 ) ;
}
2002-08-12 13:27:28 +00:00
/**
* booleanHTMLAttrs :
*
* These are the HTML attributes which will be output
* in minimized form , i . e . < option selected = " selected " > will be
* output as < option selected > , as per XSLT 1.0 16.2 " HTML Output Method "
*
*/
2016-05-20 15:21:43 -07:00
static const char * const htmlBooleanAttrs [ ] = {
2002-08-12 13:27:28 +00:00
" checked " , " compact " , " declare " , " defer " , " disabled " , " ismap " ,
" multiple " , " nohref " , " noresize " , " noshade " , " nowrap " , " readonly " ,
" selected " , NULL
} ;
/**
* htmlIsBooleanAttr :
* @ name : the name of the attribute to check
*
* Determine if a given attribute is a boolean attribute .
2012-09-11 13:26:36 +08:00
*
2002-08-12 13:27:28 +00:00
* returns : false if the attribute is not boolean , true otherwise .
*/
int
htmlIsBooleanAttr ( const xmlChar * name )
{
int i = 0 ;
while ( htmlBooleanAttrs [ i ] ! = NULL ) {
2002-09-26 12:40:03 +00:00
if ( xmlStrcasecmp ( ( const xmlChar * ) htmlBooleanAttrs [ i ] , name ) = = 0 )
2002-08-12 13:27:28 +00:00
return 1 ;
i + + ;
}
return 0 ;
}
2003-09-29 13:20:24 +00:00
# ifdef LIBXML_OUTPUT_ENABLED
2003-10-09 13:14:55 +00:00
/************************************************************************
* *
2012-09-11 13:26:36 +08:00
* Output error handlers *
2003-10-09 13:14:55 +00:00
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* htmlSaveErr :
* @ code : the error number
* @ node : the location of the error .
2020-03-08 17:19:42 +01:00
* @ extra : extra information
2003-10-09 13:14:55 +00:00
*
* Handle an out of memory condition
*/
static void
htmlSaveErr ( int code , xmlNodePtr node , const char * extra )
{
const char * msg = NULL ;
2023-12-18 21:30:22 +01:00
int res ;
2003-10-09 13:14:55 +00:00
switch ( code ) {
case XML_SAVE_NOT_UTF8 :
2006-08-15 23:14:24 +00:00
msg = " string is not in UTF-8 \n " ;
2003-10-09 13:14:55 +00:00
break ;
case XML_SAVE_CHAR_INVALID :
2006-08-15 23:14:24 +00:00
msg = " invalid character value \n " ;
2003-10-09 13:14:55 +00:00
break ;
case XML_SAVE_UNKNOWN_ENCODING :
2006-08-15 23:14:24 +00:00
msg = " unknown encoding %s \n " ;
2003-10-09 13:14:55 +00:00
break ;
case XML_SAVE_NO_DOCTYPE :
2006-08-15 23:14:24 +00:00
msg = " HTML has no DOCTYPE \n " ;
2003-10-09 13:14:55 +00:00
break ;
default :
2006-08-15 23:14:24 +00:00
msg = " unexpected error number \n " ;
2003-10-09 13:14:55 +00:00
}
2023-12-18 21:30:22 +01:00
res = __xmlRaiseError ( NULL , NULL , NULL , NULL , node ,
XML_FROM_OUTPUT , code , XML_ERR_ERROR , NULL , 0 ,
extra , NULL , NULL , 0 , 0 ,
msg , extra ) ;
if ( res < 0 )
xmlRaiseMemoryError ( NULL , NULL , NULL , XML_FROM_OUTPUT , NULL ) ;
2003-10-09 13:14:55 +00:00
}
2001-02-23 17:55:21 +00:00
/************************************************************************
* *
2012-09-11 13:26:36 +08:00
* Dumping HTML tree content to a simple buffer *
2001-02-23 17:55:21 +00:00
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2023-12-10 19:07:32 +01:00
static xmlCharEncodingHandler *
htmlFindOutputEncoder ( const char * encoding ) {
xmlCharEncodingHandler * handler = NULL ;
if ( encoding ! = NULL ) {
xmlCharEncoding enc ;
enc = xmlParseCharEncoding ( encoding ) ;
if ( enc ! = XML_CHAR_ENCODING_UTF8 ) {
2024-01-02 18:33:57 +01:00
xmlOpenCharEncodingHandler ( encoding , /* output */ 1 , & handler ) ;
2023-12-10 19:07:32 +01:00
if ( handler = = NULL )
htmlSaveErr ( XML_SAVE_UNKNOWN_ENCODING , NULL , encoding ) ;
}
} else {
/*
* Fallback to HTML or ASCII when the encoding is unspecified
*/
if ( handler = = NULL )
2024-01-02 18:33:57 +01:00
xmlOpenCharEncodingHandler ( " HTML " , /* output */ 1 , & handler ) ;
2023-12-10 19:07:32 +01:00
if ( handler = = NULL )
2024-01-02 18:33:57 +01:00
xmlOpenCharEncodingHandler ( " ascii " , /* output */ 1 , & handler ) ;
2023-12-10 19:07:32 +01:00
}
return ( handler ) ;
}
2001-02-23 17:55:21 +00:00
/**
2012-07-16 14:58:02 +08:00
* htmlBufNodeDumpFormat :
* @ buf : the xmlBufPtr output
2001-02-23 17:55:21 +00:00
* @ doc : the document
* @ cur : the current node
2001-06-13 13:48:46 +00:00
* @ format : should formatting spaces been added
2001-02-23 17:55:21 +00:00
*
* Dump an HTML node , recursive behaviour , children are printed too .
2002-11-27 19:39:27 +00:00
*
* Returns the number of byte written or - 1 in case of error
2001-02-23 17:55:21 +00:00
*/
2012-07-16 14:58:02 +08:00
static size_t
htmlBufNodeDumpFormat ( xmlBufPtr buf , xmlDocPtr doc , xmlNodePtr cur ,
2001-06-13 13:48:46 +00:00
int format ) {
2012-07-16 14:58:02 +08:00
size_t use ;
2024-03-05 18:06:02 +01:00
size_t ret ;
2002-11-27 19:39:27 +00:00
xmlOutputBufferPtr outbuf ;
2001-02-23 17:55:21 +00:00
if ( cur = = NULL ) {
2024-03-04 01:39:00 +01:00
return ( ( size_t ) - 1 ) ;
2002-11-27 19:39:27 +00:00
}
if ( buf = = NULL ) {
2024-03-04 01:39:00 +01:00
return ( ( size_t ) - 1 ) ;
2002-11-27 19:39:27 +00:00
}
outbuf = ( xmlOutputBufferPtr ) xmlMalloc ( sizeof ( xmlOutputBuffer ) ) ;
2023-12-18 21:30:22 +01:00
if ( outbuf = = NULL )
2024-03-04 01:39:00 +01:00
return ( ( size_t ) - 1 ) ;
2022-09-01 01:18:30 +02:00
memset ( outbuf , 0 , sizeof ( xmlOutputBuffer ) ) ;
2002-11-27 19:39:27 +00:00
outbuf - > buffer = buf ;
outbuf - > encoder = NULL ;
outbuf - > writecallback = NULL ;
outbuf - > closecallback = NULL ;
outbuf - > context = NULL ;
outbuf - > written = 0 ;
2012-07-16 14:58:02 +08:00
use = xmlBufUse ( buf ) ;
2002-11-27 19:39:27 +00:00
htmlNodeDumpFormatOutput ( outbuf , doc , cur , NULL , format ) ;
2024-03-05 18:06:02 +01:00
if ( outbuf - > error )
ret = ( size_t ) - 1 ;
else
ret = xmlBufUse ( buf ) - use ;
2002-11-27 19:39:27 +00:00
xmlFree ( outbuf ) ;
return ( ret ) ;
2001-02-23 17:55:21 +00:00
}
/**
2001-06-13 13:48:46 +00:00
* htmlNodeDump :
* @ buf : the HTML buffer output
* @ doc : the document
* @ cur : the current node
*
* Dump an HTML node , recursive behaviour , children are printed too ,
* and formatting returns are added .
2002-11-27 19:39:27 +00:00
*
* Returns the number of byte written or - 1 in case of error
2001-06-13 13:48:46 +00:00
*/
2002-11-27 19:39:27 +00:00
int
2001-06-13 13:48:46 +00:00
htmlNodeDump ( xmlBufferPtr buf , xmlDocPtr doc , xmlNodePtr cur ) {
2012-07-16 14:58:02 +08:00
xmlBufPtr buffer ;
size_t ret ;
if ( ( buf = = NULL ) | | ( cur = = NULL ) )
return ( - 1 ) ;
2003-08-08 14:00:28 +00:00
xmlInitParser ( ) ;
2012-07-16 14:58:02 +08:00
buffer = xmlBufFromBuffer ( buf ) ;
if ( buffer = = NULL )
return ( - 1 ) ;
2024-03-06 14:14:33 +01:00
xmlBufSetAllocationScheme ( buffer , XML_BUFFER_ALLOC_DOUBLEIT ) ;
2012-07-16 14:58:02 +08:00
ret = htmlBufNodeDumpFormat ( buffer , doc , cur , 1 ) ;
2003-08-08 14:00:28 +00:00
2012-07-16 14:58:02 +08:00
xmlBufBackToBuffer ( buffer ) ;
if ( ret > INT_MAX )
return ( - 1 ) ;
return ( ( int ) ret ) ;
2001-06-13 13:48:46 +00:00
}
/**
* htmlNodeDumpFileFormat :
2001-02-23 17:55:21 +00:00
* @ out : the FILE pointer
* @ doc : the document
* @ cur : the current node
2001-06-13 13:48:46 +00:00
* @ encoding : the document encoding
* @ format : should formatting spaces been added
2001-02-23 17:55:21 +00:00
*
* Dump an HTML node , recursive behaviour , children are printed too .
2001-06-13 13:48:46 +00:00
*
2001-06-14 11:11:59 +00:00
* TODO : if encoding = = NULL try to save in the doc encoding
*
* returns : the number of byte written or - 1 in case of failure .
2001-02-23 17:55:21 +00:00
*/
2001-06-14 11:11:59 +00:00
int
htmlNodeDumpFileFormat ( FILE * out , xmlDocPtr doc ,
xmlNodePtr cur , const char * encoding , int format ) {
xmlOutputBufferPtr buf ;
2023-12-10 19:07:32 +01:00
xmlCharEncodingHandlerPtr handler ;
2001-06-14 11:11:59 +00:00
int ret ;
2003-08-08 14:00:28 +00:00
xmlInitParser ( ) ;
2012-09-11 13:26:36 +08:00
/*
2001-06-14 11:11:59 +00:00
* save the content to a temp buffer .
*/
2023-12-10 19:07:32 +01:00
handler = htmlFindOutputEncoder ( encoding ) ;
2001-06-14 11:11:59 +00:00
buf = xmlOutputBufferCreateFile ( out , handler ) ;
2024-05-06 11:36:25 +02:00
if ( buf = = NULL ) {
xmlCharEncCloseFunc ( handler ) ;
return ( 0 ) ;
}
2001-06-14 11:11:59 +00:00
2021-02-07 13:38:01 +01:00
htmlNodeDumpFormatOutput ( buf , doc , cur , NULL , format ) ;
2001-06-14 11:11:59 +00:00
ret = xmlOutputBufferClose ( buf ) ;
return ( ret ) ;
2001-02-23 17:55:21 +00:00
}
2001-06-13 13:48:46 +00:00
/**
* htmlNodeDumpFile :
* @ out : the FILE pointer
* @ doc : the document
* @ cur : the current node
*
* Dump an HTML node , recursive behaviour , children are printed too ,
* and formatting returns are added .
*/
void
htmlNodeDumpFile ( FILE * out , xmlDocPtr doc , xmlNodePtr cur ) {
htmlNodeDumpFileFormat ( out , doc , cur , NULL , 1 ) ;
}
2001-02-23 17:55:21 +00:00
/**
2005-12-20 15:55:14 +00:00
* htmlDocDumpMemoryFormat :
2001-02-23 17:55:21 +00:00
* @ cur : the document
* @ mem : OUT : the memory pointer
2001-05-30 18:32:34 +00:00
* @ size : OUT : the memory length
2005-12-20 15:55:14 +00:00
* @ format : should formatting spaces been added
2001-02-23 17:55:21 +00:00
*
* Dump an HTML document in memory and return the xmlChar * and it ' s size .
* It ' s up to the caller to free the memory .
*/
void
2005-12-20 15:55:14 +00:00
htmlDocDumpMemoryFormat ( xmlDocPtr cur , xmlChar * * mem , int * size , int format ) {
2001-05-30 18:32:34 +00:00
xmlOutputBufferPtr buf ;
xmlCharEncodingHandlerPtr handler = NULL ;
const char * encoding ;
2001-02-23 17:55:21 +00:00
2003-08-08 14:00:28 +00:00
xmlInitParser ( ) ;
2004-11-06 19:24:28 +00:00
if ( ( mem = = NULL ) | | ( size = = NULL ) )
return ;
2024-03-05 18:06:02 +01:00
* mem = NULL ;
* size = 0 ;
if ( cur = = NULL )
2001-02-23 17:55:21 +00:00
return ;
2001-05-30 18:32:34 +00:00
encoding = ( const char * ) htmlGetMetaEncoding ( cur ) ;
2023-12-10 19:07:32 +01:00
handler = htmlFindOutputEncoder ( encoding ) ;
2008-09-01 13:08:57 +00:00
buf = xmlAllocOutputBufferInternal ( handler ) ;
2024-05-06 11:36:25 +02:00
if ( buf = = NULL ) {
xmlCharEncCloseFunc ( handler ) ;
2001-02-23 17:55:21 +00:00
return ;
2024-05-06 11:36:25 +02:00
}
2001-05-30 18:32:34 +00:00
2011-05-16 16:03:50 +08:00
htmlDocContentDumpFormatOutput ( buf , cur , NULL , format ) ;
2005-12-20 15:55:14 +00:00
2001-05-30 18:32:34 +00:00
xmlOutputBufferFlush ( buf ) ;
2024-03-05 18:06:02 +01:00
if ( ! buf - > error ) {
if ( buf - > conv ! = NULL ) {
* size = xmlBufUse ( buf - > conv ) ;
* mem = xmlStrndup ( xmlBufContent ( buf - > conv ) , * size ) ;
} else {
* size = xmlBufUse ( buf - > buffer ) ;
* mem = xmlStrndup ( xmlBufContent ( buf - > buffer ) , * size ) ;
}
2001-05-30 18:32:34 +00:00
}
2024-03-05 18:06:02 +01:00
xmlOutputBufferClose ( buf ) ;
2001-02-23 17:55:21 +00:00
}
2005-12-20 15:55:14 +00:00
/**
* htmlDocDumpMemory :
* @ cur : the document
* @ mem : OUT : the memory pointer
* @ size : OUT : the memory length
*
* Dump an HTML document in memory and return the xmlChar * and it ' s size .
* It ' s up to the caller to free the memory .
*/
void
htmlDocDumpMemory ( xmlDocPtr cur , xmlChar * * mem , int * size ) {
htmlDocDumpMemoryFormat ( cur , mem , size , 1 ) ;
}
2001-02-23 17:55:21 +00:00
/************************************************************************
* *
2012-09-11 13:26:36 +08:00
* Dumping HTML tree content to an I / O output buffer *
2001-02-23 17:55:21 +00:00
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
2001-06-13 07:45:41 +00:00
* htmlDtdDumpOutput :
2001-02-23 17:55:21 +00:00
* @ buf : the HTML buffer output
* @ doc : the document
* @ encoding : the encoding string
2012-09-11 13:26:36 +08:00
*
2001-03-24 17:00:36 +00:00
* TODO : check whether encoding is needed
*
2001-02-23 17:55:21 +00:00
* Dump the HTML document DTD , if any .
*/
static void
2001-03-24 17:00:36 +00:00
htmlDtdDumpOutput ( xmlOutputBufferPtr buf , xmlDocPtr doc ,
2001-03-26 16:28:29 +00:00
const char * encoding ATTRIBUTE_UNUSED ) {
2001-02-23 17:55:21 +00:00
xmlDtdPtr cur = doc - > intSubset ;
if ( cur = = NULL ) {
2003-10-09 13:14:55 +00:00
htmlSaveErr ( XML_SAVE_NO_DOCTYPE , ( xmlNodePtr ) doc , NULL ) ;
2001-02-23 17:55:21 +00:00
return ;
}
xmlOutputBufferWriteString ( buf , " <!DOCTYPE " ) ;
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > name ) ;
if ( cur - > ExternalID ! = NULL ) {
xmlOutputBufferWriteString ( buf , " PUBLIC " ) ;
2024-02-16 15:42:38 +01:00
xmlOutputBufferWriteQuotedString ( buf , cur - > ExternalID ) ;
2001-02-23 17:55:21 +00:00
if ( cur - > SystemID ! = NULL ) {
xmlOutputBufferWriteString ( buf , " " ) ;
2024-02-16 15:42:38 +01:00
xmlOutputBufferWriteQuotedString ( buf , cur - > SystemID ) ;
2012-09-11 13:26:36 +08:00
}
2015-04-03 22:52:36 +08:00
} else if ( cur - > SystemID ! = NULL & &
xmlStrcmp ( cur - > SystemID , BAD_CAST " about:legacy-compat " ) ) {
2001-02-23 17:55:21 +00:00
xmlOutputBufferWriteString ( buf , " SYSTEM " ) ;
2024-02-16 15:42:38 +01:00
xmlOutputBufferWriteQuotedString ( buf , cur - > SystemID ) ;
2001-02-23 17:55:21 +00:00
}
xmlOutputBufferWriteString ( buf , " > \n " ) ;
}
/**
2001-06-13 07:45:41 +00:00
* htmlAttrDumpOutput :
2001-02-23 17:55:21 +00:00
* @ buf : the HTML buffer output
* @ doc : the document
* @ cur : the attribute pointer
*
* Dump an HTML attribute
*/
static void
2021-02-07 13:38:01 +01:00
htmlAttrDumpOutput ( xmlOutputBufferPtr buf , xmlDocPtr doc , xmlAttrPtr cur ) {
2001-02-23 17:55:21 +00:00
xmlChar * value ;
2001-06-13 07:45:41 +00:00
/*
2012-09-05 11:45:32 +08:00
* The html output method should not escape a & character
* occurring in an attribute value immediately followed by
* a { character ( see Section B .7 .1 of the HTML 4.0 Recommendation ) .
* This is implemented in xmlEncodeEntitiesReentrant
2001-06-13 07:45:41 +00:00
*/
2001-02-23 17:55:21 +00:00
if ( cur = = NULL ) {
return ;
}
xmlOutputBufferWriteString ( buf , " " ) ;
2003-09-15 04:58:14 +00:00
if ( ( cur - > ns ! = NULL ) & & ( cur - > ns - > prefix ! = NULL ) ) {
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > ns - > prefix ) ;
xmlOutputBufferWriteString ( buf , " : " ) ;
}
2001-02-23 17:55:21 +00:00
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > name ) ;
2002-08-12 13:27:28 +00:00
if ( ( cur - > children ! = NULL ) & & ( ! htmlIsBooleanAttr ( cur - > name ) ) ) {
2001-02-23 17:55:21 +00:00
value = xmlNodeListGetString ( doc , cur - > children , 0 ) ;
if ( value ) {
xmlOutputBufferWriteString ( buf , " = " ) ;
2003-03-27 14:08:24 +00:00
if ( ( cur - > ns = = NULL ) & & ( cur - > parent ! = NULL ) & &
( cur - > parent - > ns = = NULL ) & &
( ( ! xmlStrcasecmp ( cur - > name , BAD_CAST " href " ) ) | |
( ! xmlStrcasecmp ( cur - > name , BAD_CAST " action " ) ) | |
2005-03-29 20:30:17 +00:00
( ! xmlStrcasecmp ( cur - > name , BAD_CAST " src " ) ) | |
( ( ! xmlStrcasecmp ( cur - > name , BAD_CAST " name " ) ) & &
( ! xmlStrcasecmp ( cur - > parent - > name , BAD_CAST " a " ) ) ) ) ) {
2020-08-15 18:32:29 +02:00
xmlChar * escaped ;
2002-04-14 22:00:22 +00:00
xmlChar * tmp = value ;
2003-10-18 16:20:14 +00:00
while ( IS_BLANK_CH ( * tmp ) ) tmp + + ;
2002-04-14 22:00:22 +00:00
2020-08-15 18:32:29 +02:00
/*
2022-11-14 21:02:15 +01:00
* Angle brackets are technically illegal in URIs , but they ' re
* used in server side includes , for example . Curly brackets
* are illegal as well and often used in templates .
* Don ' t escape non - whitespace , printable ASCII chars for
* improved interoperability . Only escape space , control
* and non - ASCII chars .
2020-08-15 18:32:29 +02:00
*/
2022-11-14 21:02:15 +01:00
escaped = xmlURIEscapeStr ( tmp ,
BAD_CAST " \" #$%&+,/:;<=>?@[ \\ ]^`{|} " ) ;
2020-08-15 18:32:29 +02:00
if ( escaped ! = NULL ) {
2024-02-16 15:42:38 +01:00
xmlOutputBufferWriteQuotedString ( buf , escaped ) ;
2020-08-15 18:32:29 +02:00
xmlFree ( escaped ) ;
} else {
2023-12-10 19:07:32 +01:00
buf - > error = XML_ERR_NO_MEMORY ;
2002-04-14 22:00:22 +00:00
}
} else {
2024-02-16 15:42:38 +01:00
xmlOutputBufferWriteQuotedString ( buf , value ) ;
2002-04-14 22:00:22 +00:00
}
2001-02-23 17:55:21 +00:00
xmlFree ( value ) ;
} else {
2023-12-10 19:07:32 +01:00
buf - > error = XML_ERR_NO_MEMORY ;
2001-02-23 17:55:21 +00:00
}
}
}
/**
2001-06-13 13:48:46 +00:00
* htmlNodeDumpFormatOutput :
2001-02-23 17:55:21 +00:00
* @ buf : the HTML buffer output
* @ doc : the document
* @ cur : the current node
2021-02-07 13:38:01 +01:00
* @ encoding : the encoding string ( unused )
2001-06-13 13:48:46 +00:00
* @ format : should formatting spaces been added
2001-02-23 17:55:21 +00:00
*
* Dump an HTML node , recursive behaviour , children are printed too .
*/
void
2001-06-13 13:48:46 +00:00
htmlNodeDumpFormatOutput ( xmlOutputBufferPtr buf , xmlDocPtr doc ,
2021-02-07 13:38:01 +01:00
xmlNodePtr cur , const char * encoding ATTRIBUTE_UNUSED ,
int format ) {
2021-05-18 20:08:28 +02:00
xmlNodePtr root , parent ;
2020-07-28 02:42:37 +02:00
xmlAttrPtr attr ;
2001-08-16 23:26:59 +00:00
const htmlElemDesc * info ;
2001-02-23 17:55:21 +00:00
2003-08-08 14:00:28 +00:00
xmlInitParser ( ) ;
2004-11-05 10:03:46 +00:00
if ( ( cur = = NULL ) | | ( buf = = NULL ) ) {
2001-02-23 17:55:21 +00:00
return ;
}
2020-07-28 02:42:37 +02:00
root = cur ;
2021-05-18 20:08:28 +02:00
parent = cur - > parent ;
2020-07-28 02:42:37 +02:00
while ( 1 ) {
switch ( cur - > type ) {
case XML_HTML_DOCUMENT_NODE :
case XML_DOCUMENT_NODE :
if ( ( ( xmlDocPtr ) cur ) - > intSubset ! = NULL ) {
htmlDtdDumpOutput ( buf , ( xmlDocPtr ) cur , NULL ) ;
}
2021-06-07 15:09:53 +02:00
if ( cur - > children ! = NULL ) {
/* Always validate cur->parent when descending. */
if ( cur - > parent = = parent ) {
parent = cur ;
cur = cur - > children ;
continue ;
}
} else {
xmlOutputBufferWriteString ( buf , " \n " ) ;
2020-07-28 02:42:37 +02:00
}
break ;
2001-02-23 17:55:21 +00:00
2020-07-28 02:42:37 +02:00
case XML_ELEMENT_NODE :
2021-05-18 20:08:28 +02:00
/*
* Some users like lxml are known to pass nodes with a corrupted
* tree structure . Fall back to a recursive call to handle this
* case .
*/
if ( ( cur - > parent ! = parent ) & & ( cur - > children ! = NULL ) ) {
htmlNodeDumpFormatOutput ( buf , doc , cur , encoding , format ) ;
break ;
}
2020-07-28 02:42:37 +02:00
/*
* Get specific HTML info for that node .
*/
if ( cur - > ns = = NULL )
info = htmlTagLookup ( cur - > name ) ;
else
info = NULL ;
xmlOutputBufferWriteString ( buf , " < " ) ;
2003-04-10 21:40:49 +00:00
if ( ( cur - > ns ! = NULL ) & & ( cur - > ns - > prefix ! = NULL ) ) {
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > ns - > prefix ) ;
xmlOutputBufferWriteString ( buf , " : " ) ;
}
2020-07-28 02:42:37 +02:00
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > name ) ;
if ( cur - > nsDef )
xmlNsListDumpOutput ( buf , cur - > nsDef ) ;
attr = cur - > properties ;
while ( attr ! = NULL ) {
2021-02-07 13:38:01 +01:00
htmlAttrDumpOutput ( buf , doc , attr ) ;
2020-07-28 02:42:37 +02:00
attr = attr - > next ;
}
if ( ( info ! = NULL ) & & ( info - > empty ) ) {
xmlOutputBufferWriteString ( buf , " > " ) ;
} else if ( cur - > children = = NULL ) {
if ( ( info ! = NULL ) & & ( info - > saveEndTag ! = 0 ) & &
( xmlStrcmp ( BAD_CAST info - > name , BAD_CAST " html " ) ) & &
( xmlStrcmp ( BAD_CAST info - > name , BAD_CAST " body " ) ) ) {
xmlOutputBufferWriteString ( buf , " > " ) ;
} else {
xmlOutputBufferWriteString ( buf , " ></ " ) ;
if ( ( cur - > ns ! = NULL ) & & ( cur - > ns - > prefix ! = NULL ) ) {
xmlOutputBufferWriteString ( buf ,
( const char * ) cur - > ns - > prefix ) ;
xmlOutputBufferWriteString ( buf , " : " ) ;
}
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > name ) ;
xmlOutputBufferWriteString ( buf , " > " ) ;
}
} else {
xmlOutputBufferWriteString ( buf , " > " ) ;
if ( ( format ) & & ( info ! = NULL ) & & ( ! info - > isinline ) & &
( cur - > children - > type ! = HTML_TEXT_NODE ) & &
( cur - > children - > type ! = HTML_ENTITY_REF_NODE ) & &
( cur - > children ! = cur - > last ) & &
( cur - > name ! = NULL ) & &
( cur - > name [ 0 ] ! = ' p ' ) ) /* p, pre, param */
xmlOutputBufferWriteString ( buf , " \n " ) ;
2021-05-18 20:08:28 +02:00
parent = cur ;
2020-07-28 02:42:37 +02:00
cur = cur - > children ;
continue ;
}
if ( ( format ) & & ( cur - > next ! = NULL ) & &
( info ! = NULL ) & & ( ! info - > isinline ) ) {
if ( ( cur - > next - > type ! = HTML_TEXT_NODE ) & &
( cur - > next - > type ! = HTML_ENTITY_REF_NODE ) & &
2021-05-18 20:08:28 +02:00
( parent ! = NULL ) & &
( parent - > name ! = NULL ) & &
( parent - > name [ 0 ] ! = ' p ' ) ) /* p, pre, param */
2020-07-28 02:42:37 +02:00
xmlOutputBufferWriteString ( buf , " \n " ) ;
}
break ;
case XML_ATTRIBUTE_NODE :
2021-02-07 13:38:01 +01:00
htmlAttrDumpOutput ( buf , doc , ( xmlAttrPtr ) cur ) ;
2020-07-28 02:42:37 +02:00
break ;
case HTML_TEXT_NODE :
if ( cur - > content = = NULL )
break ;
if ( ( ( cur - > name = = ( const xmlChar * ) xmlStringText ) | |
( cur - > name ! = ( const xmlChar * ) xmlStringTextNoenc ) ) & &
2021-05-18 20:08:28 +02:00
( ( parent = = NULL ) | |
( ( xmlStrcasecmp ( parent - > name , BAD_CAST " script " ) ) & &
( xmlStrcasecmp ( parent - > name , BAD_CAST " style " ) ) ) ) ) {
2020-07-28 02:42:37 +02:00
xmlChar * buffer ;
buffer = xmlEncodeEntitiesReentrant ( doc , cur - > content ) ;
2023-12-10 19:07:32 +01:00
if ( buffer = = NULL ) {
buf - > error = XML_ERR_NO_MEMORY ;
return ;
2020-07-28 02:42:37 +02:00
}
2023-12-10 19:07:32 +01:00
xmlOutputBufferWriteString ( buf , ( const char * ) buffer ) ;
xmlFree ( buffer ) ;
2020-07-28 02:42:37 +02:00
} else {
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > content ) ;
}
break ;
case HTML_COMMENT_NODE :
if ( cur - > content ! = NULL ) {
xmlOutputBufferWriteString ( buf , " <!-- " ) ;
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > content ) ;
xmlOutputBufferWriteString ( buf , " --> " ) ;
}
break ;
case HTML_PI_NODE :
if ( cur - > name ! = NULL ) {
xmlOutputBufferWriteString ( buf , " <? " ) ;
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > name ) ;
if ( cur - > content ! = NULL ) {
xmlOutputBufferWriteString ( buf , " " ) ;
xmlOutputBufferWriteString ( buf ,
( const char * ) cur - > content ) ;
}
xmlOutputBufferWriteString ( buf , " > " ) ;
}
break ;
case HTML_ENTITY_REF_NODE :
xmlOutputBufferWriteString ( buf , " & " ) ;
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > name ) ;
xmlOutputBufferWriteString ( buf , " ; " ) ;
break ;
case HTML_PRESERVE_NODE :
if ( cur - > content ! = NULL ) {
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > content ) ;
}
break ;
default :
break ;
}
while ( 1 ) {
if ( cur = = root )
return ;
if ( cur - > next ! = NULL ) {
cur = cur - > next ;
break ;
}
2021-05-18 20:08:28 +02:00
cur = parent ;
/* cur->parent was validated when descending. */
parent = cur - > parent ;
2020-07-28 02:42:37 +02:00
if ( ( cur - > type = = XML_HTML_DOCUMENT_NODE ) | |
( cur - > type = = XML_DOCUMENT_NODE ) ) {
xmlOutputBufferWriteString ( buf , " \n " ) ;
} else {
if ( ( format ) & & ( cur - > ns = = NULL ) )
info = htmlTagLookup ( cur - > name ) ;
else
info = NULL ;
if ( ( format ) & & ( info ! = NULL ) & & ( ! info - > isinline ) & &
( cur - > last - > type ! = HTML_TEXT_NODE ) & &
( cur - > last - > type ! = HTML_ENTITY_REF_NODE ) & &
( cur - > children ! = cur - > last ) & &
( cur - > name ! = NULL ) & &
( cur - > name [ 0 ] ! = ' p ' ) ) /* p, pre, param */
xmlOutputBufferWriteString ( buf , " \n " ) ;
xmlOutputBufferWriteString ( buf , " </ " ) ;
if ( ( cur - > ns ! = NULL ) & & ( cur - > ns - > prefix ! = NULL ) ) {
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > ns - > prefix ) ;
xmlOutputBufferWriteString ( buf , " : " ) ;
}
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > name ) ;
xmlOutputBufferWriteString ( buf , " > " ) ;
if ( ( format ) & & ( info ! = NULL ) & & ( ! info - > isinline ) & &
( cur - > next ! = NULL ) ) {
if ( ( cur - > next - > type ! = HTML_TEXT_NODE ) & &
( cur - > next - > type ! = HTML_ENTITY_REF_NODE ) & &
2021-05-18 20:08:28 +02:00
( parent ! = NULL ) & &
( parent - > name ! = NULL ) & &
( parent - > name [ 0 ] ! = ' p ' ) ) /* p, pre, param */
2020-07-28 02:42:37 +02:00
xmlOutputBufferWriteString ( buf , " \n " ) ;
}
}
}
2001-02-23 17:55:21 +00:00
}
}
/**
2001-06-13 13:48:46 +00:00
* htmlNodeDumpOutput :
* @ buf : the HTML buffer output
* @ doc : the document
* @ cur : the current node
2021-02-07 13:38:01 +01:00
* @ encoding : the encoding string ( unused )
2001-06-13 13:48:46 +00:00
*
* Dump an HTML node , recursive behaviour , children are printed too ,
* and formatting returns / spaces are added .
*/
void
htmlNodeDumpOutput ( xmlOutputBufferPtr buf , xmlDocPtr doc ,
2021-02-07 13:38:01 +01:00
xmlNodePtr cur , const char * encoding ATTRIBUTE_UNUSED ) {
htmlNodeDumpFormatOutput ( buf , doc , cur , NULL , 1 ) ;
2001-06-13 13:48:46 +00:00
}
/**
* htmlDocContentDumpFormatOutput :
2001-02-23 17:55:21 +00:00
* @ buf : the HTML buffer output
* @ cur : the document
2021-02-07 13:38:01 +01:00
* @ encoding : the encoding string ( unused )
2002-01-22 18:15:52 +00:00
* @ format : should formatting spaces been added
2001-02-23 17:55:21 +00:00
*
* Dump an HTML document .
*/
void
2001-06-13 13:48:46 +00:00
htmlDocContentDumpFormatOutput ( xmlOutputBufferPtr buf , xmlDocPtr cur ,
2021-02-07 13:38:01 +01:00
const char * encoding ATTRIBUTE_UNUSED ,
int format ) {
2022-05-14 08:48:01 -07:00
int type = 0 ;
if ( cur ) {
type = cur - > type ;
cur - > type = XML_HTML_DOCUMENT_NODE ;
}
2021-02-07 13:38:01 +01:00
htmlNodeDumpFormatOutput ( buf , cur , ( xmlNodePtr ) cur , NULL , format ) ;
2022-05-14 08:48:01 -07:00
if ( cur )
cur - > type = ( xmlElementType ) type ;
2001-02-23 17:55:21 +00:00
}
2001-06-13 13:48:46 +00:00
/**
* htmlDocContentDumpOutput :
* @ buf : the HTML buffer output
* @ cur : the document
2021-02-07 13:38:01 +01:00
* @ encoding : the encoding string ( unused )
2001-06-13 13:48:46 +00:00
*
2019-09-30 17:04:54 +02:00
* Dump an HTML document . Formatting return / spaces are added .
2001-06-13 13:48:46 +00:00
*/
void
htmlDocContentDumpOutput ( xmlOutputBufferPtr buf , xmlDocPtr cur ,
2021-02-07 13:38:01 +01:00
const char * encoding ATTRIBUTE_UNUSED ) {
htmlNodeDumpFormatOutput ( buf , cur , ( xmlNodePtr ) cur , NULL , 1 ) ;
2001-06-13 13:48:46 +00:00
}
2001-02-23 17:55:21 +00:00
/************************************************************************
* *
* Saving functions front - ends *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* htmlDocDump :
* @ f : the FILE *
* @ cur : the document
*
* Dump an HTML document to an open FILE .
*
* returns : the number of byte written or - 1 in case of failure .
*/
int
htmlDocDump ( FILE * f , xmlDocPtr cur ) {
xmlOutputBufferPtr buf ;
xmlCharEncodingHandlerPtr handler = NULL ;
const char * encoding ;
int ret ;
2003-08-08 14:00:28 +00:00
xmlInitParser ( ) ;
2004-11-04 10:49:00 +00:00
if ( ( cur = = NULL ) | | ( f = = NULL ) ) {
2001-02-23 17:55:21 +00:00
return ( - 1 ) ;
}
encoding = ( const char * ) htmlGetMetaEncoding ( cur ) ;
2023-12-10 19:07:32 +01:00
handler = htmlFindOutputEncoder ( encoding ) ;
2001-02-23 17:55:21 +00:00
buf = xmlOutputBufferCreateFile ( f , handler ) ;
2024-05-06 11:36:25 +02:00
if ( buf = = NULL ) {
xmlCharEncCloseFunc ( handler ) ;
return ( - 1 ) ;
}
2001-02-23 17:55:21 +00:00
htmlDocContentDumpOutput ( buf , cur , NULL ) ;
ret = xmlOutputBufferClose ( buf ) ;
return ( ret ) ;
}
/**
* htmlSaveFile :
* @ filename : the filename ( or URL )
* @ cur : the document
*
* Dump an HTML document to a file . If @ filename is " - " the stdout file is
* used .
* returns : the number of byte written or - 1 in case of failure .
*/
int
htmlSaveFile ( const char * filename , xmlDocPtr cur ) {
xmlOutputBufferPtr buf ;
xmlCharEncodingHandlerPtr handler = NULL ;
const char * encoding ;
int ret ;
2004-11-02 14:52:23 +00:00
if ( ( cur = = NULL ) | | ( filename = = NULL ) )
return ( - 1 ) ;
2012-09-11 13:26:36 +08:00
2003-08-08 14:00:28 +00:00
xmlInitParser ( ) ;
2001-02-23 17:55:21 +00:00
encoding = ( const char * ) htmlGetMetaEncoding ( cur ) ;
2023-12-10 19:07:32 +01:00
handler = htmlFindOutputEncoder ( encoding ) ;
2001-02-23 17:55:21 +00:00
buf = xmlOutputBufferCreateFilename ( filename , handler , cur - > compression ) ;
2024-05-06 11:36:25 +02:00
if ( buf = = NULL ) {
xmlCharEncCloseFunc ( handler ) ;
return ( 0 ) ;
}
2001-02-23 17:55:21 +00:00
htmlDocContentDumpOutput ( buf , cur , NULL ) ;
ret = xmlOutputBufferClose ( buf ) ;
return ( ret ) ;
}
/**
2001-06-13 13:48:46 +00:00
* htmlSaveFileFormat :
2001-02-23 17:55:21 +00:00
* @ filename : the filename
* @ cur : the document
2001-06-13 13:48:46 +00:00
* @ format : should formatting spaces been added
* @ encoding : the document encoding
2001-02-23 17:55:21 +00:00
*
* Dump an HTML document to a file using a given encoding .
2012-09-11 13:26:36 +08:00
*
2001-02-23 17:55:21 +00:00
* returns : the number of byte written or - 1 in case of failure .
*/
int
2001-06-13 13:48:46 +00:00
htmlSaveFileFormat ( const char * filename , xmlDocPtr cur ,
const char * encoding , int format ) {
2001-02-23 17:55:21 +00:00
xmlOutputBufferPtr buf ;
xmlCharEncodingHandlerPtr handler = NULL ;
int ret ;
2004-11-02 14:52:23 +00:00
if ( ( cur = = NULL ) | | ( filename = = NULL ) )
return ( - 1 ) ;
2009-08-12 23:03:23 +02:00
2003-08-08 14:00:28 +00:00
xmlInitParser ( ) ;
2023-12-10 19:07:32 +01:00
handler = htmlFindOutputEncoder ( encoding ) ;
if ( handler ! = NULL )
htmlSetMetaEncoding ( cur , ( const xmlChar * ) handler - > name ) ;
else
2001-04-02 15:16:19 +00:00
htmlSetMetaEncoding ( cur , ( const xmlChar * ) " UTF-8 " ) ;
2001-02-23 17:55:21 +00:00
2012-09-11 13:26:36 +08:00
/*
2001-02-23 17:55:21 +00:00
* save the content to a temp buffer .
*/
buf = xmlOutputBufferCreateFilename ( filename , handler , 0 ) ;
2024-05-06 11:36:25 +02:00
if ( buf = = NULL ) {
xmlCharEncCloseFunc ( handler ) ;
return ( 0 ) ;
}
2001-02-23 17:55:21 +00:00
2001-06-13 13:48:46 +00:00
htmlDocContentDumpFormatOutput ( buf , cur , encoding , format ) ;
2001-02-23 17:55:21 +00:00
ret = xmlOutputBufferClose ( buf ) ;
return ( ret ) ;
}
2001-06-13 13:48:46 +00:00
/**
* htmlSaveFileEnc :
* @ filename : the filename
* @ cur : the document
* @ encoding : the document encoding
*
* Dump an HTML document to a file using a given encoding
* and formatting returns / spaces are added .
2012-09-11 13:26:36 +08:00
*
2001-06-13 13:48:46 +00:00
* returns : the number of byte written or - 1 in case of failure .
*/
int
htmlSaveFileEnc ( const char * filename , xmlDocPtr cur , const char * encoding ) {
return ( htmlSaveFileFormat ( filename , cur , encoding , 1 ) ) ;
}
2003-09-29 13:20:24 +00:00
# endif /* LIBXML_OUTPUT_ENABLED */
2002-08-12 13:27:28 +00:00
2001-02-23 17:55:21 +00:00
# endif /* LIBXML_HTML_ENABLED */