2001-02-23 17:55:21 +00:00
/*
2001-12-31 16:16:02 +00:00
* HTMLtree . c : implementation of access function for an HTML tree .
2001-02-23 17:55:21 +00:00
*
* See Copyright for the status of this software .
*
2001-06-24 12:13:24 +00:00
* daniel @ veillard . com
2001-02-23 17:55:21 +00:00
*/
2002-03-18 19:37:11 +00:00
# define IN_LIBXML
2001-04-21 16:57:29 +00:00
# include "libxml.h"
2001-02-23 17:55:21 +00:00
# ifdef LIBXML_HTML_ENABLED
2002-11-27 19:39:27 +00:00
# include <string.h> /* for memset() only ! */
2001-02-23 17:55:21 +00:00
# ifdef HAVE_CTYPE_H
# include <ctype.h>
# endif
# ifdef HAVE_STDLIB_H
# include <stdlib.h>
# endif
# include <libxml/xmlmemory.h>
# include <libxml/HTMLparser.h>
# include <libxml/HTMLtree.h>
# include <libxml/entities.h>
# include <libxml/valid.h>
# include <libxml/xmlerror.h>
# include <libxml/parserInternals.h>
2001-10-17 15:58:35 +00:00
# include <libxml/globals.h>
2002-04-14 22:00:22 +00:00
# include <libxml/uri.h>
2001-02-23 17:55:21 +00:00
/************************************************************************
* *
* Getting / Setting encoding meta tags *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* htmlGetMetaEncoding :
* @ doc : the document
*
* Encoding definition lookup in the Meta tags
*
* Returns the current encoding as flagged in the HTML source
*/
const xmlChar *
htmlGetMetaEncoding ( htmlDocPtr doc ) {
htmlNodePtr cur ;
const xmlChar * content ;
const xmlChar * encoding ;
if ( doc = = NULL )
return ( NULL ) ;
cur = doc - > children ;
/*
* Search the html
*/
while ( cur ! = NULL ) {
2001-10-23 13:10:19 +00:00
if ( ( cur - > type = = XML_ELEMENT_NODE ) & & ( cur - > name ! = NULL ) ) {
2001-02-23 17:55:21 +00:00
if ( xmlStrEqual ( cur - > name , BAD_CAST " html " ) )
break ;
if ( xmlStrEqual ( cur - > name , BAD_CAST " head " ) )
goto found_head ;
if ( xmlStrEqual ( cur - > name , BAD_CAST " meta " ) )
goto found_meta ;
}
cur = cur - > next ;
}
if ( cur = = NULL )
return ( NULL ) ;
cur = cur - > children ;
/*
* Search the head
*/
while ( cur ! = NULL ) {
2001-10-23 13:10:19 +00:00
if ( ( cur - > type = = XML_ELEMENT_NODE ) & & ( cur - > name ! = NULL ) ) {
2001-02-23 17:55:21 +00:00
if ( xmlStrEqual ( cur - > name , BAD_CAST " head " ) )
break ;
if ( xmlStrEqual ( cur - > name , BAD_CAST " meta " ) )
goto found_meta ;
}
cur = cur - > next ;
}
if ( cur = = NULL )
return ( NULL ) ;
found_head :
cur = cur - > children ;
/*
* Search the meta elements
*/
found_meta :
while ( cur ! = NULL ) {
2001-10-23 13:10:19 +00:00
if ( ( cur - > type = = XML_ELEMENT_NODE ) & & ( cur - > name ! = NULL ) ) {
2001-02-23 17:55:21 +00:00
if ( xmlStrEqual ( cur - > name , BAD_CAST " meta " ) ) {
xmlAttrPtr attr = cur - > properties ;
int http ;
const xmlChar * value ;
content = NULL ;
http = 0 ;
while ( attr ! = NULL ) {
if ( ( attr - > children ! = NULL ) & &
( attr - > children - > type = = XML_TEXT_NODE ) & &
( attr - > children - > next = = NULL ) ) {
value = attr - > children - > content ;
if ( ( ! xmlStrcasecmp ( attr - > name , BAD_CAST " http-equiv " ) )
& & ( ! xmlStrcasecmp ( value , BAD_CAST " Content-Type " ) ) )
http = 1 ;
else if ( ( value ! = NULL )
& & ( ! xmlStrcasecmp ( attr - > name , BAD_CAST " content " ) ) )
content = value ;
if ( ( http ! = 0 ) & & ( content ! = NULL ) )
goto found_content ;
}
attr = attr - > next ;
}
}
}
cur = cur - > next ;
}
return ( NULL ) ;
found_content :
encoding = xmlStrstr ( content , BAD_CAST " charset= " ) ;
if ( encoding = = NULL )
encoding = xmlStrstr ( content , BAD_CAST " Charset= " ) ;
if ( encoding = = NULL )
encoding = xmlStrstr ( content , BAD_CAST " CHARSET= " ) ;
if ( encoding ! = NULL ) {
encoding + = 8 ;
} else {
encoding = xmlStrstr ( content , BAD_CAST " charset = " ) ;
if ( encoding = = NULL )
encoding = xmlStrstr ( content , BAD_CAST " Charset = " ) ;
if ( encoding = = NULL )
encoding = xmlStrstr ( content , BAD_CAST " CHARSET = " ) ;
if ( encoding ! = NULL )
encoding + = 9 ;
}
if ( encoding ! = NULL ) {
while ( ( * encoding = = ' ' ) | | ( * encoding = = ' \t ' ) ) encoding + + ;
}
return ( encoding ) ;
}
/**
* htmlSetMetaEncoding :
* @ doc : the document
* @ encoding : the encoding string
*
* Sets the current encoding in the Meta tags
* NOTE : this will not change the document content encoding , just
* the META flag associated .
*
* Returns 0 in case of success and - 1 in case of error
*/
int
htmlSetMetaEncoding ( htmlDocPtr doc , const xmlChar * encoding ) {
htmlNodePtr cur , meta ;
const xmlChar * content ;
char newcontent [ 100 ] ;
if ( doc = = NULL )
return ( - 1 ) ;
if ( encoding ! = NULL ) {
snprintf ( newcontent , sizeof ( newcontent ) , " text/html; charset=%s " ,
encoding ) ;
newcontent [ sizeof ( newcontent ) - 1 ] = 0 ;
}
cur = doc - > children ;
/*
* Search the html
*/
while ( cur ! = NULL ) {
2001-10-23 13:10:19 +00:00
if ( ( cur - > type = = XML_ELEMENT_NODE ) & & ( cur - > name ! = NULL ) ) {
2001-04-18 09:45:35 +00:00
if ( xmlStrcasecmp ( cur - > name , BAD_CAST " html " ) = = 0 )
break ;
if ( xmlStrcasecmp ( cur - > name , BAD_CAST " head " ) = = 0 )
goto found_head ;
if ( xmlStrcasecmp ( cur - > name , BAD_CAST " meta " ) = = 0 )
goto found_meta ;
2001-02-23 17:55:21 +00:00
}
cur = cur - > next ;
}
if ( cur = = NULL )
return ( - 1 ) ;
cur = cur - > children ;
/*
* Search the head
*/
while ( cur ! = NULL ) {
2001-10-23 13:10:19 +00:00
if ( ( cur - > type = = XML_ELEMENT_NODE ) & & ( cur - > name ! = NULL ) ) {
2001-04-18 09:45:35 +00:00
if ( xmlStrcasecmp ( cur - > name , BAD_CAST " head " ) = = 0 )
break ;
if ( xmlStrcasecmp ( cur - > name , BAD_CAST " meta " ) = = 0 )
goto found_meta ;
2001-02-23 17:55:21 +00:00
}
cur = cur - > next ;
}
if ( cur = = NULL )
return ( - 1 ) ;
found_head :
if ( cur - > children = = NULL ) {
if ( encoding = = NULL )
return ( 0 ) ;
meta = xmlNewDocNode ( doc , NULL , BAD_CAST " meta " , NULL ) ;
xmlAddChild ( cur , meta ) ;
2001-04-18 09:45:35 +00:00
xmlNewProp ( meta , BAD_CAST " http-equiv " , BAD_CAST " Content-Type " ) ;
2002-07-17 17:57:34 +00:00
xmlNewProp ( meta , BAD_CAST " content " , BAD_CAST newcontent ) ;
2001-02-23 17:55:21 +00:00
return ( 0 ) ;
}
cur = cur - > children ;
found_meta :
if ( encoding ! = NULL ) {
/*
2001-12-31 16:16:02 +00:00
* Create a new Meta element with the right attributes
2001-02-23 17:55:21 +00:00
*/
meta = xmlNewDocNode ( doc , NULL , BAD_CAST " meta " , NULL ) ;
xmlAddPrevSibling ( cur , meta ) ;
2001-04-18 09:45:35 +00:00
xmlNewProp ( meta , BAD_CAST " http-equiv " , BAD_CAST " Content-Type " ) ;
2002-07-17 17:57:34 +00:00
xmlNewProp ( meta , BAD_CAST " content " , BAD_CAST newcontent ) ;
2001-02-23 17:55:21 +00:00
}
/*
* Search and destroy all the remaining the meta elements carrying
* encoding informations
*/
while ( cur ! = NULL ) {
2001-10-23 13:10:19 +00:00
if ( ( cur - > type = = XML_ELEMENT_NODE ) & & ( cur - > name ! = NULL ) ) {
2001-04-18 09:45:35 +00:00
if ( xmlStrcasecmp ( cur - > name , BAD_CAST " meta " ) = = 0 ) {
2001-02-23 17:55:21 +00:00
xmlAttrPtr attr = cur - > properties ;
int http ;
const xmlChar * value ;
content = NULL ;
http = 0 ;
while ( attr ! = NULL ) {
if ( ( attr - > children ! = NULL ) & &
( attr - > children - > type = = XML_TEXT_NODE ) & &
( attr - > children - > next = = NULL ) ) {
value = attr - > children - > content ;
if ( ( ! xmlStrcasecmp ( attr - > name , BAD_CAST " http-equiv " ) )
& & ( ! xmlStrcasecmp ( value , BAD_CAST " Content-Type " ) ) )
http = 1 ;
2001-04-18 09:45:35 +00:00
else
{
if ( ( value ! = NULL ) & &
( ! xmlStrcasecmp ( attr - > name , BAD_CAST " content " ) ) )
content = value ;
}
2002-03-06 21:39:42 +00:00
if ( ( http ! = 0 ) & & ( content ! = NULL ) )
2001-02-23 17:55:21 +00:00
break ;
}
attr = attr - > next ;
}
2002-03-06 21:39:42 +00:00
if ( ( http ! = 0 ) & & ( content ! = NULL ) ) {
2001-02-23 17:55:21 +00:00
meta = cur ;
cur = cur - > next ;
xmlUnlinkNode ( meta ) ;
xmlFreeNode ( meta ) ;
continue ;
}
}
}
cur = cur - > next ;
}
return ( 0 ) ;
}
2002-08-12 13:27:28 +00:00
/**
* booleanHTMLAttrs :
*
* These are the HTML attributes which will be output
* in minimized form , i . e . < option selected = " selected " > will be
* output as < option selected > , as per XSLT 1.0 16.2 " HTML Output Method "
*
*/
static const char * htmlBooleanAttrs [ ] = {
" checked " , " compact " , " declare " , " defer " , " disabled " , " ismap " ,
" multiple " , " nohref " , " noresize " , " noshade " , " nowrap " , " readonly " ,
" selected " , NULL
} ;
/**
* htmlIsBooleanAttr :
* @ name : the name of the attribute to check
*
* Determine if a given attribute is a boolean attribute .
*
* returns : false if the attribute is not boolean , true otherwise .
*/
int
htmlIsBooleanAttr ( const xmlChar * name )
{
int i = 0 ;
while ( htmlBooleanAttrs [ i ] ! = NULL ) {
2002-09-26 12:40:03 +00:00
if ( xmlStrcasecmp ( ( const xmlChar * ) htmlBooleanAttrs [ i ] , name ) = = 0 )
2002-08-12 13:27:28 +00:00
return 1 ;
i + + ;
}
return 0 ;
}
2001-02-23 17:55:21 +00:00
/************************************************************************
* *
* Dumping HTML tree content to a simple buffer *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2002-11-27 19:39:27 +00:00
static int
2001-12-13 14:55:21 +00:00
htmlNodeDumpFormat ( xmlBufferPtr buf , xmlDocPtr doc , xmlNodePtr cur ,
int format ) ;
2001-02-23 17:55:21 +00:00
/**
2001-06-13 13:48:46 +00:00
* htmlNodeDumpFormat :
2001-02-23 17:55:21 +00:00
* @ buf : the HTML buffer output
* @ doc : the document
* @ cur : the current node
2001-06-13 13:48:46 +00:00
* @ format : should formatting spaces been added
2001-02-23 17:55:21 +00:00
*
* Dump an HTML node , recursive behaviour , children are printed too .
2002-11-27 19:39:27 +00:00
*
* Returns the number of byte written or - 1 in case of error
2001-02-23 17:55:21 +00:00
*/
2002-11-27 19:39:27 +00:00
static int
2001-06-13 13:48:46 +00:00
htmlNodeDumpFormat ( xmlBufferPtr buf , xmlDocPtr doc , xmlNodePtr cur ,
int format ) {
2002-11-27 19:39:27 +00:00
unsigned int use ;
int ret ;
xmlOutputBufferPtr outbuf ;
2001-02-23 17:55:21 +00:00
if ( cur = = NULL ) {
2002-11-27 19:39:27 +00:00
return ( - 1 ) ;
}
if ( buf = = NULL ) {
return ( - 1 ) ;
}
outbuf = ( xmlOutputBufferPtr ) xmlMalloc ( sizeof ( xmlOutputBuffer ) ) ;
if ( outbuf = = NULL ) {
xmlGenericError ( xmlGenericErrorContext ,
" htmlNodeDumpFormat: out of memory! \n " ) ;
return ( - 1 ) ;
}
memset ( outbuf , 0 , ( size_t ) sizeof ( xmlOutputBuffer ) ) ;
outbuf - > buffer = buf ;
outbuf - > encoder = NULL ;
outbuf - > writecallback = NULL ;
outbuf - > closecallback = NULL ;
outbuf - > context = NULL ;
outbuf - > written = 0 ;
use = buf - > use ;
htmlNodeDumpFormatOutput ( outbuf , doc , cur , NULL , format ) ;
xmlFree ( outbuf ) ;
ret = buf - > use - use ;
return ( ret ) ;
2001-02-23 17:55:21 +00:00
}
/**
2001-06-13 13:48:46 +00:00
* htmlNodeDump :
* @ buf : the HTML buffer output
* @ doc : the document
* @ cur : the current node
*
* Dump an HTML node , recursive behaviour , children are printed too ,
* and formatting returns are added .
2002-11-27 19:39:27 +00:00
*
* Returns the number of byte written or - 1 in case of error
2001-06-13 13:48:46 +00:00
*/
2002-11-27 19:39:27 +00:00
int
2001-06-13 13:48:46 +00:00
htmlNodeDump ( xmlBufferPtr buf , xmlDocPtr doc , xmlNodePtr cur ) {
2002-11-27 19:39:27 +00:00
return ( htmlNodeDumpFormat ( buf , doc , cur , 1 ) ) ;
2001-06-13 13:48:46 +00:00
}
/**
* htmlNodeDumpFileFormat :
2001-02-23 17:55:21 +00:00
* @ out : the FILE pointer
* @ doc : the document
* @ cur : the current node
2001-06-13 13:48:46 +00:00
* @ encoding : the document encoding
* @ format : should formatting spaces been added
2001-02-23 17:55:21 +00:00
*
* Dump an HTML node , recursive behaviour , children are printed too .
2001-06-13 13:48:46 +00:00
*
2001-06-14 11:11:59 +00:00
* TODO : if encoding = = NULL try to save in the doc encoding
*
* returns : the number of byte written or - 1 in case of failure .
2001-02-23 17:55:21 +00:00
*/
2001-06-14 11:11:59 +00:00
int
htmlNodeDumpFileFormat ( FILE * out , xmlDocPtr doc ,
xmlNodePtr cur , const char * encoding , int format ) {
xmlOutputBufferPtr buf ;
xmlCharEncodingHandlerPtr handler = NULL ;
int ret ;
if ( encoding ! = NULL ) {
xmlCharEncoding enc ;
enc = xmlParseCharEncoding ( encoding ) ;
if ( enc ! = XML_CHAR_ENCODING_UTF8 ) {
handler = xmlFindCharEncodingHandler ( encoding ) ;
if ( handler = = NULL )
return ( - 1 ) ;
}
}
/*
* Fallback to HTML or ASCII when the encoding is unspecified
*/
if ( handler = = NULL )
handler = xmlFindCharEncodingHandler ( " HTML " ) ;
if ( handler = = NULL )
handler = xmlFindCharEncodingHandler ( " ascii " ) ;
/*
* save the content to a temp buffer .
*/
buf = xmlOutputBufferCreateFile ( out , handler ) ;
if ( buf = = NULL ) return ( 0 ) ;
htmlNodeDumpFormatOutput ( buf , doc , cur , encoding , format ) ;
ret = xmlOutputBufferClose ( buf ) ;
return ( ret ) ;
2001-02-23 17:55:21 +00:00
}
2001-06-13 13:48:46 +00:00
/**
* htmlNodeDumpFile :
* @ out : the FILE pointer
* @ doc : the document
* @ cur : the current node
*
* Dump an HTML node , recursive behaviour , children are printed too ,
* and formatting returns are added .
*/
void
htmlNodeDumpFile ( FILE * out , xmlDocPtr doc , xmlNodePtr cur ) {
htmlNodeDumpFileFormat ( out , doc , cur , NULL , 1 ) ;
}
2001-02-23 17:55:21 +00:00
/**
* htmlDocDumpMemory :
* @ cur : the document
* @ mem : OUT : the memory pointer
2001-05-30 18:32:34 +00:00
* @ size : OUT : the memory length
2001-02-23 17:55:21 +00:00
*
* Dump an HTML document in memory and return the xmlChar * and it ' s size .
* It ' s up to the caller to free the memory .
*/
void
htmlDocDumpMemory ( xmlDocPtr cur , xmlChar * * mem , int * size ) {
2001-05-30 18:32:34 +00:00
xmlOutputBufferPtr buf ;
xmlCharEncodingHandlerPtr handler = NULL ;
const char * encoding ;
2001-02-23 17:55:21 +00:00
if ( cur = = NULL ) {
# ifdef DEBUG_TREE
xmlGenericError ( xmlGenericErrorContext ,
2001-05-30 18:32:34 +00:00
" htmlDocDumpMemory : document == NULL \n " ) ;
2001-02-23 17:55:21 +00:00
# endif
* mem = NULL ;
* size = 0 ;
return ;
}
2001-05-30 18:32:34 +00:00
encoding = ( const char * ) htmlGetMetaEncoding ( cur ) ;
if ( encoding ! = NULL ) {
xmlCharEncoding enc ;
enc = xmlParseCharEncoding ( encoding ) ;
if ( enc ! = cur - > charset ) {
if ( cur - > charset ! = XML_CHAR_ENCODING_UTF8 ) {
/*
* Not supported yet
*/
* mem = NULL ;
* size = 0 ;
return ;
}
handler = xmlFindCharEncodingHandler ( encoding ) ;
if ( handler = = NULL ) {
* mem = NULL ;
* size = 0 ;
return ;
}
}
}
/*
* Fallback to HTML or ASCII when the encoding is unspecified
*/
if ( handler = = NULL )
handler = xmlFindCharEncodingHandler ( " HTML " ) ;
if ( handler = = NULL )
handler = xmlFindCharEncodingHandler ( " ascii " ) ;
buf = xmlAllocOutputBuffer ( handler ) ;
2001-02-23 17:55:21 +00:00
if ( buf = = NULL ) {
* mem = NULL ;
* size = 0 ;
return ;
}
2001-05-30 18:32:34 +00:00
htmlDocContentDumpOutput ( buf , cur , NULL ) ;
xmlOutputBufferFlush ( buf ) ;
if ( buf - > conv ! = NULL ) {
* size = buf - > conv - > use ;
* mem = xmlStrndup ( buf - > conv - > content , * size ) ;
} else {
* size = buf - > buffer - > use ;
* mem = xmlStrndup ( buf - > buffer - > content , * size ) ;
}
( void ) xmlOutputBufferClose ( buf ) ;
2001-02-23 17:55:21 +00:00
}
/************************************************************************
* *
* Dumping HTML tree content to an I / O output buffer *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2003-01-09 13:19:33 +00:00
void xmlNsListDumpOutput ( xmlOutputBufferPtr buf , xmlNsPtr cur ) ;
2002-08-12 13:27:28 +00:00
2001-02-23 17:55:21 +00:00
/**
2001-06-13 07:45:41 +00:00
* htmlDtdDumpOutput :
2001-02-23 17:55:21 +00:00
* @ buf : the HTML buffer output
* @ doc : the document
* @ encoding : the encoding string
*
2001-03-24 17:00:36 +00:00
* TODO : check whether encoding is needed
*
2001-02-23 17:55:21 +00:00
* Dump the HTML document DTD , if any .
*/
static void
2001-03-24 17:00:36 +00:00
htmlDtdDumpOutput ( xmlOutputBufferPtr buf , xmlDocPtr doc ,
2001-03-26 16:28:29 +00:00
const char * encoding ATTRIBUTE_UNUSED ) {
2001-02-23 17:55:21 +00:00
xmlDtdPtr cur = doc - > intSubset ;
if ( cur = = NULL ) {
xmlGenericError ( xmlGenericErrorContext ,
2001-12-31 16:16:02 +00:00
" htmlDtdDumpOutput : no internal subset \n " ) ;
2001-02-23 17:55:21 +00:00
return ;
}
xmlOutputBufferWriteString ( buf , " <!DOCTYPE " ) ;
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > name ) ;
if ( cur - > ExternalID ! = NULL ) {
xmlOutputBufferWriteString ( buf , " PUBLIC " ) ;
xmlBufferWriteQuotedString ( buf - > buffer , cur - > ExternalID ) ;
if ( cur - > SystemID ! = NULL ) {
xmlOutputBufferWriteString ( buf , " " ) ;
xmlBufferWriteQuotedString ( buf - > buffer , cur - > SystemID ) ;
}
} else if ( cur - > SystemID ! = NULL ) {
xmlOutputBufferWriteString ( buf , " SYSTEM " ) ;
xmlBufferWriteQuotedString ( buf - > buffer , cur - > SystemID ) ;
}
xmlOutputBufferWriteString ( buf , " > \n " ) ;
}
/**
2001-06-13 07:45:41 +00:00
* htmlAttrDumpOutput :
2001-02-23 17:55:21 +00:00
* @ buf : the HTML buffer output
* @ doc : the document
* @ cur : the attribute pointer
* @ encoding : the encoding string
*
* Dump an HTML attribute
*/
static void
2001-03-24 17:00:36 +00:00
htmlAttrDumpOutput ( xmlOutputBufferPtr buf , xmlDocPtr doc , xmlAttrPtr cur ,
2001-03-26 16:28:29 +00:00
const char * encoding ATTRIBUTE_UNUSED ) {
2001-02-23 17:55:21 +00:00
xmlChar * value ;
2001-06-13 07:45:41 +00:00
/*
* TODO : The html output method should not escape a & character
* occurring in an attribute value immediately followed by
* a { character ( see Section B .7 .1 of the HTML 4.0 Recommendation ) .
*/
2001-02-23 17:55:21 +00:00
if ( cur = = NULL ) {
xmlGenericError ( xmlGenericErrorContext ,
2001-12-31 16:16:02 +00:00
" htmlAttrDumpOutput : property == NULL \n " ) ;
2001-02-23 17:55:21 +00:00
return ;
}
xmlOutputBufferWriteString ( buf , " " ) ;
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > name ) ;
2002-08-12 13:27:28 +00:00
if ( ( cur - > children ! = NULL ) & & ( ! htmlIsBooleanAttr ( cur - > name ) ) ) {
2001-02-23 17:55:21 +00:00
value = xmlNodeListGetString ( doc , cur - > children , 0 ) ;
if ( value ) {
xmlOutputBufferWriteString ( buf , " = " ) ;
2002-08-12 14:53:41 +00:00
if ( ( ! xmlStrcasecmp ( cur - > name , BAD_CAST " href " ) ) | |
( ! xmlStrcasecmp ( cur - > name , BAD_CAST " src " ) ) ) {
2002-04-14 22:00:22 +00:00
xmlChar * escaped ;
xmlChar * tmp = value ;
while ( IS_BLANK ( * tmp ) ) tmp + + ;
2002-04-18 11:54:04 +00:00
escaped = xmlURIEscapeStr ( tmp , BAD_CAST " @/:=?;#%& " ) ;
2002-04-14 22:00:22 +00:00
if ( escaped ! = NULL ) {
xmlBufferWriteQuotedString ( buf - > buffer , escaped ) ;
xmlFree ( escaped ) ;
} else {
xmlBufferWriteQuotedString ( buf - > buffer , value ) ;
}
} else {
xmlBufferWriteQuotedString ( buf - > buffer , value ) ;
}
2001-02-23 17:55:21 +00:00
xmlFree ( value ) ;
} else {
xmlOutputBufferWriteString ( buf , " = \" \" " ) ;
}
}
}
/**
2001-06-13 07:45:41 +00:00
* htmlAttrListDumpOutput :
2001-02-23 17:55:21 +00:00
* @ buf : the HTML buffer output
* @ doc : the document
* @ cur : the first attribute pointer
* @ encoding : the encoding string
*
* Dump a list of HTML attributes
*/
static void
htmlAttrListDumpOutput ( xmlOutputBufferPtr buf , xmlDocPtr doc , xmlAttrPtr cur , const char * encoding ) {
if ( cur = = NULL ) {
xmlGenericError ( xmlGenericErrorContext ,
2001-12-31 16:16:02 +00:00
" htmlAttrListDumpOutput : property == NULL \n " ) ;
2001-02-23 17:55:21 +00:00
return ;
}
while ( cur ! = NULL ) {
htmlAttrDumpOutput ( buf , doc , cur , encoding ) ;
cur = cur - > next ;
}
}
/**
2001-06-13 07:45:41 +00:00
* htmlNodeListDumpOutput :
2001-02-23 17:55:21 +00:00
* @ buf : the HTML buffer output
* @ doc : the document
* @ cur : the first node
* @ encoding : the encoding string
2001-06-13 13:48:46 +00:00
* @ format : should formatting spaces been added
2001-02-23 17:55:21 +00:00
*
* Dump an HTML node list , recursive behaviour , children are printed too .
*/
static void
2001-06-13 13:48:46 +00:00
htmlNodeListDumpOutput ( xmlOutputBufferPtr buf , xmlDocPtr doc ,
xmlNodePtr cur , const char * encoding , int format ) {
2001-02-23 17:55:21 +00:00
if ( cur = = NULL ) {
xmlGenericError ( xmlGenericErrorContext ,
2001-12-31 16:16:02 +00:00
" htmlNodeListDumpOutput : node == NULL \n " ) ;
2001-02-23 17:55:21 +00:00
return ;
}
while ( cur ! = NULL ) {
2001-06-13 13:48:46 +00:00
htmlNodeDumpFormatOutput ( buf , doc , cur , encoding , format ) ;
2001-02-23 17:55:21 +00:00
cur = cur - > next ;
}
}
/**
2001-06-13 13:48:46 +00:00
* htmlNodeDumpFormatOutput :
2001-02-23 17:55:21 +00:00
* @ buf : the HTML buffer output
* @ doc : the document
* @ cur : the current node
* @ encoding : the encoding string
2001-06-13 13:48:46 +00:00
* @ format : should formatting spaces been added
2001-02-23 17:55:21 +00:00
*
* Dump an HTML node , recursive behaviour , children are printed too .
*/
void
2001-06-13 13:48:46 +00:00
htmlNodeDumpFormatOutput ( xmlOutputBufferPtr buf , xmlDocPtr doc ,
xmlNodePtr cur , const char * encoding , int format ) {
2001-08-16 23:26:59 +00:00
const htmlElemDesc * info ;
2001-02-23 17:55:21 +00:00
if ( cur = = NULL ) {
xmlGenericError ( xmlGenericErrorContext ,
2001-12-31 16:16:02 +00:00
" htmlNodeDumpFormatOutput : node == NULL \n " ) ;
2001-02-23 17:55:21 +00:00
return ;
}
/*
* Special cases .
*/
if ( cur - > type = = XML_DTD_NODE )
return ;
if ( cur - > type = = XML_HTML_DOCUMENT_NODE ) {
htmlDocContentDumpOutput ( buf , ( xmlDocPtr ) cur , encoding ) ;
return ;
}
if ( cur - > type = = HTML_TEXT_NODE ) {
if ( cur - > content ! = NULL ) {
2001-10-11 22:55:55 +00:00
if ( ( ( cur - > name = = ( const xmlChar * ) xmlStringText ) | |
( cur - > name ! = ( const xmlChar * ) xmlStringTextNoenc ) ) & &
2001-06-05 20:57:42 +00:00
( ( cur - > parent = = NULL ) | |
2002-10-16 15:23:26 +00:00
( ( xmlStrcasecmp ( cur - > parent - > name , BAD_CAST " script " ) ) & &
( xmlStrcasecmp ( cur - > parent - > name , BAD_CAST " style " ) ) ) ) ) {
2001-02-23 17:55:21 +00:00
xmlChar * buffer ;
buffer = xmlEncodeEntitiesReentrant ( doc , cur - > content ) ;
if ( buffer ! = NULL ) {
xmlOutputBufferWriteString ( buf , ( const char * ) buffer ) ;
xmlFree ( buffer ) ;
}
} else {
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > content ) ;
}
}
return ;
}
if ( cur - > type = = HTML_COMMENT_NODE ) {
if ( cur - > content ! = NULL ) {
xmlOutputBufferWriteString ( buf , " <!-- " ) ;
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > content ) ;
xmlOutputBufferWriteString ( buf , " --> " ) ;
}
return ;
}
2001-04-24 15:52:00 +00:00
if ( cur - > type = = HTML_PI_NODE ) {
2001-04-25 10:29:44 +00:00
if ( cur - > name = = NULL )
return ;
xmlOutputBufferWriteString ( buf , " <? " ) ;
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > name ) ;
2001-04-24 15:52:00 +00:00
if ( cur - > content ! = NULL ) {
2001-04-25 10:29:44 +00:00
xmlOutputBufferWriteString ( buf , " " ) ;
2001-04-24 15:52:00 +00:00
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > content ) ;
}
2001-04-25 10:29:44 +00:00
xmlOutputBufferWriteString ( buf , " > " ) ;
2001-04-24 15:52:00 +00:00
return ;
}
2001-02-23 17:55:21 +00:00
if ( cur - > type = = HTML_ENTITY_REF_NODE ) {
xmlOutputBufferWriteString ( buf , " & " ) ;
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > name ) ;
xmlOutputBufferWriteString ( buf , " ; " ) ;
return ;
}
if ( cur - > type = = HTML_PRESERVE_NODE ) {
if ( cur - > content ! = NULL ) {
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > content ) ;
}
return ;
}
/*
2001-12-31 16:16:02 +00:00
* Get specific HTML info for that node .
2001-02-23 17:55:21 +00:00
*/
2003-01-09 13:19:33 +00:00
if ( cur - > ns = = NULL )
info = htmlTagLookup ( cur - > name ) ;
else
info = NULL ;
2001-02-23 17:55:21 +00:00
xmlOutputBufferWriteString ( buf , " < " ) ;
2003-01-09 13:19:33 +00:00
if ( ( cur - > ns ! = NULL ) & & ( cur - > ns - > prefix ! = NULL ) ) {
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > ns - > prefix ) ;
xmlOutputBufferWriteString ( buf , " : " ) ;
}
2001-02-23 17:55:21 +00:00
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > name ) ;
2003-01-09 13:19:33 +00:00
if ( cur - > nsDef )
xmlNsListDumpOutput ( buf , cur - > nsDef ) ;
2001-02-23 17:55:21 +00:00
if ( cur - > properties ! = NULL )
htmlAttrListDumpOutput ( buf , doc , cur - > properties , encoding ) ;
if ( ( info ! = NULL ) & & ( info - > empty ) ) {
xmlOutputBufferWriteString ( buf , " > " ) ;
2001-06-13 21:11:59 +00:00
if ( ( format ) & & ( ! info - > isinline ) & & ( cur - > next ! = NULL ) ) {
2001-02-23 17:55:21 +00:00
if ( ( cur - > next - > type ! = HTML_TEXT_NODE ) & &
2001-06-07 11:20:20 +00:00
( cur - > next - > type ! = HTML_ENTITY_REF_NODE ) & &
( cur - > parent ! = NULL ) & &
( ! xmlStrEqual ( cur - > parent - > name , BAD_CAST " pre " ) ) )
2001-02-23 17:55:21 +00:00
xmlOutputBufferWriteString ( buf , " \n " ) ;
}
return ;
}
2001-07-12 01:20:08 +00:00
if ( ( ( cur - > type = = XML_ELEMENT_NODE ) | | ( cur - > content = = NULL ) ) & &
( cur - > children = = NULL ) ) {
2001-02-23 17:55:21 +00:00
if ( ( info ! = NULL ) & & ( info - > saveEndTag ! = 0 ) & &
2001-06-13 07:45:41 +00:00
( xmlStrcmp ( BAD_CAST info - > name , BAD_CAST " html " ) ) & &
( xmlStrcmp ( BAD_CAST info - > name , BAD_CAST " body " ) ) ) {
2001-02-23 17:55:21 +00:00
xmlOutputBufferWriteString ( buf , " > " ) ;
} else {
xmlOutputBufferWriteString ( buf , " ></ " ) ;
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > name ) ;
xmlOutputBufferWriteString ( buf , " > " ) ;
}
2001-06-13 21:11:59 +00:00
if ( ( format ) & & ( cur - > next ! = NULL ) & &
( info ! = NULL ) & & ( ! info - > isinline ) ) {
2001-02-23 17:55:21 +00:00
if ( ( cur - > next - > type ! = HTML_TEXT_NODE ) & &
2001-06-07 11:20:20 +00:00
( cur - > next - > type ! = HTML_ENTITY_REF_NODE ) & &
( cur - > parent ! = NULL ) & &
( ! xmlStrEqual ( cur - > parent - > name , BAD_CAST " pre " ) ) )
2001-02-23 17:55:21 +00:00
xmlOutputBufferWriteString ( buf , " \n " ) ;
}
return ;
}
xmlOutputBufferWriteString ( buf , " > " ) ;
2001-07-12 01:20:08 +00:00
if ( ( cur - > type ! = XML_ELEMENT_NODE ) & &
( cur - > content ! = NULL ) ) {
2001-02-23 17:55:21 +00:00
/*
* Uses the OutputBuffer property to automatically convert
* invalids to charrefs
*/
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > content ) ;
}
if ( cur - > children ! = NULL ) {
2001-06-13 21:11:59 +00:00
if ( ( format ) & & ( info ! = NULL ) & & ( ! info - > isinline ) & &
( cur - > children - > type ! = HTML_TEXT_NODE ) & &
2001-02-23 17:55:21 +00:00
( cur - > children - > type ! = HTML_ENTITY_REF_NODE ) & &
2001-06-07 16:07:07 +00:00
( cur - > children ! = cur - > last ) & &
( ! xmlStrEqual ( cur - > name , BAD_CAST " pre " ) ) )
2001-02-23 17:55:21 +00:00
xmlOutputBufferWriteString ( buf , " \n " ) ;
2001-06-13 13:48:46 +00:00
htmlNodeListDumpOutput ( buf , doc , cur - > children , encoding , format ) ;
2001-06-13 21:11:59 +00:00
if ( ( format ) & & ( info ! = NULL ) & & ( ! info - > isinline ) & &
( cur - > last - > type ! = HTML_TEXT_NODE ) & &
2001-02-23 17:55:21 +00:00
( cur - > last - > type ! = HTML_ENTITY_REF_NODE ) & &
2001-06-07 16:07:07 +00:00
( cur - > children ! = cur - > last ) & &
( ! xmlStrEqual ( cur - > name , BAD_CAST " pre " ) ) )
2001-02-23 17:55:21 +00:00
xmlOutputBufferWriteString ( buf , " \n " ) ;
}
xmlOutputBufferWriteString ( buf , " </ " ) ;
2003-01-09 13:19:33 +00:00
if ( ( cur - > ns ! = NULL ) & & ( cur - > ns - > prefix ! = NULL ) ) {
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > ns - > prefix ) ;
xmlOutputBufferWriteString ( buf , " : " ) ;
}
2001-02-23 17:55:21 +00:00
xmlOutputBufferWriteString ( buf , ( const char * ) cur - > name ) ;
xmlOutputBufferWriteString ( buf , " > " ) ;
2001-06-13 21:11:59 +00:00
if ( ( format ) & & ( info ! = NULL ) & & ( ! info - > isinline ) & &
( cur - > next ! = NULL ) ) {
2001-02-23 17:55:21 +00:00
if ( ( cur - > next - > type ! = HTML_TEXT_NODE ) & &
2001-06-07 16:07:07 +00:00
( cur - > next - > type ! = HTML_ENTITY_REF_NODE ) & &
( cur - > parent ! = NULL ) & &
( ! xmlStrEqual ( cur - > parent - > name , BAD_CAST " pre " ) ) )
2001-02-23 17:55:21 +00:00
xmlOutputBufferWriteString ( buf , " \n " ) ;
}
}
/**
2001-06-13 13:48:46 +00:00
* htmlNodeDumpOutput :
* @ buf : the HTML buffer output
* @ doc : the document
* @ cur : the current node
* @ encoding : the encoding string
*
* Dump an HTML node , recursive behaviour , children are printed too ,
* and formatting returns / spaces are added .
*/
void
htmlNodeDumpOutput ( xmlOutputBufferPtr buf , xmlDocPtr doc ,
xmlNodePtr cur , const char * encoding ) {
htmlNodeDumpFormatOutput ( buf , doc , cur , encoding , 1 ) ;
}
/**
* htmlDocContentDumpFormatOutput :
2001-02-23 17:55:21 +00:00
* @ buf : the HTML buffer output
* @ cur : the document
* @ encoding : the encoding string
2002-01-22 18:15:52 +00:00
* @ format : should formatting spaces been added
2001-02-23 17:55:21 +00:00
*
* Dump an HTML document .
*/
void
2001-06-13 13:48:46 +00:00
htmlDocContentDumpFormatOutput ( xmlOutputBufferPtr buf , xmlDocPtr cur ,
const char * encoding , int format ) {
2001-02-23 17:55:21 +00:00
int type ;
/*
* force to output the stuff as HTML , especially for entities
*/
type = cur - > type ;
cur - > type = XML_HTML_DOCUMENT_NODE ;
2001-04-02 15:16:19 +00:00
if ( cur - > intSubset ! = NULL ) {
2001-02-23 17:55:21 +00:00
htmlDtdDumpOutput ( buf , cur , NULL ) ;
}
if ( cur - > children ! = NULL ) {
2001-06-13 13:48:46 +00:00
htmlNodeListDumpOutput ( buf , cur , cur - > children , encoding , format ) ;
2001-02-23 17:55:21 +00:00
}
xmlOutputBufferWriteString ( buf , " \n " ) ;
cur - > type = ( xmlElementType ) type ;
}
2001-06-13 13:48:46 +00:00
/**
* htmlDocContentDumpOutput :
* @ buf : the HTML buffer output
* @ cur : the document
* @ encoding : the encoding string
*
* Dump an HTML document . Formating return / spaces are added .
*/
void
htmlDocContentDumpOutput ( xmlOutputBufferPtr buf , xmlDocPtr cur ,
const char * encoding ) {
htmlDocContentDumpFormatOutput ( buf , cur , encoding , 1 ) ;
}
2001-02-23 17:55:21 +00:00
/************************************************************************
* *
* Saving functions front - ends *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* htmlDocDump :
* @ f : the FILE *
* @ cur : the document
*
* Dump an HTML document to an open FILE .
*
* returns : the number of byte written or - 1 in case of failure .
*/
int
htmlDocDump ( FILE * f , xmlDocPtr cur ) {
xmlOutputBufferPtr buf ;
xmlCharEncodingHandlerPtr handler = NULL ;
const char * encoding ;
int ret ;
if ( cur = = NULL ) {
# ifdef DEBUG_TREE
xmlGenericError ( xmlGenericErrorContext ,
" htmlDocDump : document == NULL \n " ) ;
# endif
return ( - 1 ) ;
}
encoding = ( const char * ) htmlGetMetaEncoding ( cur ) ;
if ( encoding ! = NULL ) {
xmlCharEncoding enc ;
enc = xmlParseCharEncoding ( encoding ) ;
if ( enc ! = cur - > charset ) {
if ( cur - > charset ! = XML_CHAR_ENCODING_UTF8 ) {
/*
* Not supported yet
*/
return ( - 1 ) ;
}
handler = xmlFindCharEncodingHandler ( encoding ) ;
if ( handler = = NULL )
return ( - 1 ) ;
}
}
/*
* Fallback to HTML or ASCII when the encoding is unspecified
*/
if ( handler = = NULL )
handler = xmlFindCharEncodingHandler ( " HTML " ) ;
if ( handler = = NULL )
handler = xmlFindCharEncodingHandler ( " ascii " ) ;
buf = xmlOutputBufferCreateFile ( f , handler ) ;
if ( buf = = NULL ) return ( - 1 ) ;
htmlDocContentDumpOutput ( buf , cur , NULL ) ;
ret = xmlOutputBufferClose ( buf ) ;
return ( ret ) ;
}
/**
* htmlSaveFile :
* @ filename : the filename ( or URL )
* @ cur : the document
*
* Dump an HTML document to a file . If @ filename is " - " the stdout file is
* used .
* returns : the number of byte written or - 1 in case of failure .
*/
int
htmlSaveFile ( const char * filename , xmlDocPtr cur ) {
xmlOutputBufferPtr buf ;
xmlCharEncodingHandlerPtr handler = NULL ;
const char * encoding ;
int ret ;
encoding = ( const char * ) htmlGetMetaEncoding ( cur ) ;
if ( encoding ! = NULL ) {
xmlCharEncoding enc ;
enc = xmlParseCharEncoding ( encoding ) ;
if ( enc ! = cur - > charset ) {
if ( cur - > charset ! = XML_CHAR_ENCODING_UTF8 ) {
/*
* Not supported yet
*/
return ( - 1 ) ;
}
handler = xmlFindCharEncodingHandler ( encoding ) ;
if ( handler = = NULL )
return ( - 1 ) ;
}
}
/*
* Fallback to HTML or ASCII when the encoding is unspecified
*/
if ( handler = = NULL )
handler = xmlFindCharEncodingHandler ( " HTML " ) ;
if ( handler = = NULL )
handler = xmlFindCharEncodingHandler ( " ascii " ) ;
/*
* save the content to a temp buffer .
*/
buf = xmlOutputBufferCreateFilename ( filename , handler , cur - > compression ) ;
if ( buf = = NULL ) return ( 0 ) ;
htmlDocContentDumpOutput ( buf , cur , NULL ) ;
ret = xmlOutputBufferClose ( buf ) ;
return ( ret ) ;
}
/**
2001-06-13 13:48:46 +00:00
* htmlSaveFileFormat :
2001-02-23 17:55:21 +00:00
* @ filename : the filename
* @ cur : the document
2001-06-13 13:48:46 +00:00
* @ format : should formatting spaces been added
* @ encoding : the document encoding
2001-02-23 17:55:21 +00:00
*
* Dump an HTML document to a file using a given encoding .
*
* returns : the number of byte written or - 1 in case of failure .
*/
int
2001-06-13 13:48:46 +00:00
htmlSaveFileFormat ( const char * filename , xmlDocPtr cur ,
const char * encoding , int format ) {
2001-02-23 17:55:21 +00:00
xmlOutputBufferPtr buf ;
xmlCharEncodingHandlerPtr handler = NULL ;
int ret ;
if ( encoding ! = NULL ) {
xmlCharEncoding enc ;
enc = xmlParseCharEncoding ( encoding ) ;
if ( enc ! = cur - > charset ) {
if ( cur - > charset ! = XML_CHAR_ENCODING_UTF8 ) {
/*
* Not supported yet
*/
return ( - 1 ) ;
}
handler = xmlFindCharEncodingHandler ( encoding ) ;
if ( handler = = NULL )
return ( - 1 ) ;
htmlSetMetaEncoding ( cur , ( const xmlChar * ) encoding ) ;
}
2001-04-02 15:16:19 +00:00
} else {
htmlSetMetaEncoding ( cur , ( const xmlChar * ) " UTF-8 " ) ;
2001-02-23 17:55:21 +00:00
}
/*
* Fallback to HTML or ASCII when the encoding is unspecified
*/
if ( handler = = NULL )
handler = xmlFindCharEncodingHandler ( " HTML " ) ;
if ( handler = = NULL )
handler = xmlFindCharEncodingHandler ( " ascii " ) ;
/*
* save the content to a temp buffer .
*/
buf = xmlOutputBufferCreateFilename ( filename , handler , 0 ) ;
if ( buf = = NULL ) return ( 0 ) ;
2001-06-13 13:48:46 +00:00
htmlDocContentDumpFormatOutput ( buf , cur , encoding , format ) ;
2001-02-23 17:55:21 +00:00
ret = xmlOutputBufferClose ( buf ) ;
return ( ret ) ;
}
2001-06-13 13:48:46 +00:00
/**
* htmlSaveFileEnc :
* @ filename : the filename
* @ cur : the document
* @ encoding : the document encoding
*
* Dump an HTML document to a file using a given encoding
* and formatting returns / spaces are added .
*
* returns : the number of byte written or - 1 in case of failure .
*/
int
htmlSaveFileEnc ( const char * filename , xmlDocPtr cur , const char * encoding ) {
return ( htmlSaveFileFormat ( filename , cur , encoding , 1 ) ) ;
}
2002-08-12 13:27:28 +00:00
2001-02-23 17:55:21 +00:00
# endif /* LIBXML_HTML_ENABLED */