1999-07-07 04:19:20 +00:00
/*
* HTMLtree . c : implemetation of access function for an HTML tree .
*
* See Copyright for the status of this software .
*
* Daniel . Veillard @ w3 . org
*/
1999-09-22 09:46:25 +00:00
1999-12-22 11:30:41 +00:00
# ifdef WIN32
# include "win32config.h"
# else
1999-07-07 04:19:20 +00:00
# include "config.h"
1999-09-22 09:46:25 +00:00
# endif
2000-04-03 19:48:13 +00:00
# include "xmlversion.h"
# ifdef LIBXML_HTML_ENABLED
1999-07-07 04:19:20 +00:00
# include <stdio.h>
1999-09-22 09:46:25 +00:00
# include <string.h> /* for memset() only ! */
# ifdef HAVE_CTYPE_H
1999-07-07 04:19:20 +00:00
# include <ctype.h>
1999-09-22 09:46:25 +00:00
# endif
# ifdef HAVE_STDLIB_H
1999-07-07 04:19:20 +00:00
# include <stdlib.h>
1999-09-22 09:46:25 +00:00
# endif
1999-07-07 04:19:20 +00:00
2000-04-03 19:48:13 +00:00
# include <libxml/xmlmemory.h>
# include <libxml/HTMLparser.h>
# include <libxml/HTMLtree.h>
# include <libxml/entities.h>
# include <libxml/valid.h>
1999-07-07 04:19:20 +00:00
1999-12-28 16:35:14 +00:00
static void
htmlDocContentDump ( xmlBufferPtr buf , xmlDocPtr cur ) ;
1999-07-07 04:19:20 +00:00
/**
* htmlDtdDump :
* @ buf : the HTML buffer output
* @ doc : the document
*
* Dump the HTML document DTD , if any .
*/
static void
htmlDtdDump ( xmlBufferPtr buf , xmlDocPtr doc ) {
xmlDtdPtr cur = doc - > intSubset ;
if ( cur = = NULL ) {
fprintf ( stderr , " htmlDtdDump : no internal subset \n " ) ;
return ;
}
xmlBufferWriteChar ( buf , " <!DOCTYPE " ) ;
xmlBufferWriteCHAR ( buf , cur - > name ) ;
if ( cur - > ExternalID ! = NULL ) {
xmlBufferWriteChar ( buf , " PUBLIC " ) ;
xmlBufferWriteQuotedString ( buf , cur - > ExternalID ) ;
1999-07-15 14:24:29 +00:00
if ( cur - > SystemID ! = NULL ) {
xmlBufferWriteChar ( buf , " " ) ;
xmlBufferWriteQuotedString ( buf , cur - > SystemID ) ;
}
1999-07-07 04:19:20 +00:00
} else if ( cur - > SystemID ! = NULL ) {
xmlBufferWriteChar ( buf , " SYSTEM " ) ;
xmlBufferWriteQuotedString ( buf , cur - > SystemID ) ;
}
xmlBufferWriteChar ( buf , " > \n " ) ;
}
/**
* htmlAttrDump :
* @ buf : the HTML buffer output
* @ doc : the document
* @ cur : the attribute pointer
*
* Dump an HTML attribute
*/
static void
htmlAttrDump ( xmlBufferPtr buf , xmlDocPtr doc , xmlAttrPtr cur ) {
1999-09-23 22:19:22 +00:00
xmlChar * value ;
1999-07-07 04:19:20 +00:00
if ( cur = = NULL ) {
fprintf ( stderr , " htmlAttrDump : property == NULL \n " ) ;
return ;
}
xmlBufferWriteChar ( buf , " " ) ;
xmlBufferWriteCHAR ( buf , cur - > name ) ;
2000-06-28 23:40:59 +00:00
if ( cur - > children ! = NULL ) {
value = xmlNodeListGetString ( doc , cur - > children , 0 ) ;
if ( value ) {
xmlBufferWriteChar ( buf , " = " ) ;
xmlBufferWriteQuotedString ( buf , value ) ;
xmlFree ( value ) ;
} else {
xmlBufferWriteChar ( buf , " = \" \" " ) ;
}
1999-07-07 04:19:20 +00:00
}
}
/**
* htmlAttrListDump :
* @ buf : the HTML buffer output
* @ doc : the document
* @ cur : the first attribute pointer
*
* Dump a list of HTML attributes
*/
static void
htmlAttrListDump ( xmlBufferPtr buf , xmlDocPtr doc , xmlAttrPtr cur ) {
if ( cur = = NULL ) {
fprintf ( stderr , " htmlAttrListDump : property == NULL \n " ) ;
return ;
}
while ( cur ! = NULL ) {
htmlAttrDump ( buf , doc , cur ) ;
cur = cur - > next ;
}
}
1999-12-28 16:35:14 +00:00
void
1999-07-07 07:32:15 +00:00
htmlNodeDump ( xmlBufferPtr buf , xmlDocPtr doc , xmlNodePtr cur ) ;
1999-07-07 04:19:20 +00:00
/**
* htmlNodeListDump :
* @ buf : the HTML buffer output
* @ doc : the document
* @ cur : the first node
*
* Dump an HTML node list , recursive behaviour , children are printed too .
*/
static void
1999-07-07 07:32:15 +00:00
htmlNodeListDump ( xmlBufferPtr buf , xmlDocPtr doc , xmlNodePtr cur ) {
1999-07-07 04:19:20 +00:00
if ( cur = = NULL ) {
fprintf ( stderr , " htmlNodeListDump : node == NULL \n " ) ;
return ;
}
while ( cur ! = NULL ) {
1999-07-07 07:32:15 +00:00
htmlNodeDump ( buf , doc , cur ) ;
1999-07-07 04:19:20 +00:00
cur = cur - > next ;
}
}
/**
* htmlNodeDump :
* @ buf : the HTML buffer output
* @ doc : the document
* @ cur : the current node
*
* Dump an HTML node , recursive behaviour , children are printed too .
*/
1999-12-28 16:35:14 +00:00
void
1999-07-07 07:32:15 +00:00
htmlNodeDump ( xmlBufferPtr buf , xmlDocPtr doc , xmlNodePtr cur ) {
htmlElemDescPtr info ;
1999-07-07 04:19:20 +00:00
if ( cur = = NULL ) {
fprintf ( stderr , " htmlNodeDump : node == NULL \n " ) ;
return ;
}
1999-07-07 07:32:15 +00:00
/*
* Special cases .
*/
1999-12-28 16:35:14 +00:00
if ( cur - > type = = XML_HTML_DOCUMENT_NODE ) {
htmlDocContentDump ( buf , ( xmlDocPtr ) cur ) ;
return ;
}
1999-07-07 04:19:20 +00:00
if ( cur - > type = = HTML_TEXT_NODE ) {
if ( cur - > content ! = NULL ) {
1999-09-23 22:19:22 +00:00
xmlChar * buffer ;
1999-07-07 04:19:20 +00:00
1999-07-07 07:32:15 +00:00
/* uses the HTML encoding routine !!!!!!!!!! */
1999-12-01 09:51:45 +00:00
# ifndef XML_USE_BUFFER_CONTENT
1999-07-07 04:19:20 +00:00
buffer = xmlEncodeEntitiesReentrant ( doc , cur - > content ) ;
1999-12-01 09:51:45 +00:00
# else
buffer = xmlEncodeEntitiesReentrant ( doc ,
xmlBufferContent ( cur - > content ) ) ;
# endif
1999-07-07 04:19:20 +00:00
if ( buffer ! = NULL ) {
xmlBufferWriteCHAR ( buf , buffer ) ;
1999-09-02 22:04:43 +00:00
xmlFree ( buffer ) ;
1999-07-07 04:19:20 +00:00
}
}
return ;
}
if ( cur - > type = = HTML_COMMENT_NODE ) {
if ( cur - > content ! = NULL ) {
xmlBufferWriteChar ( buf , " <!-- " ) ;
1999-12-01 09:51:45 +00:00
# ifndef XML_USE_BUFFER_CONTENT
1999-07-07 04:19:20 +00:00
xmlBufferWriteCHAR ( buf , cur - > content ) ;
1999-12-01 09:51:45 +00:00
# else
xmlBufferWriteCHAR ( buf , xmlBufferContent ( cur - > content ) ) ;
# endif
1999-07-07 04:19:20 +00:00
xmlBufferWriteChar ( buf , " --> " ) ;
}
return ;
}
if ( cur - > type = = HTML_ENTITY_REF_NODE ) {
xmlBufferWriteChar ( buf , " & " ) ;
xmlBufferWriteCHAR ( buf , cur - > name ) ;
xmlBufferWriteChar ( buf , " ; " ) ;
return ;
}
1999-07-07 07:32:15 +00:00
/*
* Get specific HTmL info for taht node .
*/
info = htmlTagLookup ( cur - > name ) ;
1999-07-07 04:19:20 +00:00
1999-07-07 07:32:15 +00:00
xmlBufferWriteChar ( buf , " < " ) ;
1999-07-07 04:19:20 +00:00
xmlBufferWriteCHAR ( buf , cur - > name ) ;
if ( cur - > properties ! = NULL )
htmlAttrListDump ( buf , doc , cur - > properties ) ;
1999-10-14 09:10:25 +00:00
if ( ( info ! = NULL ) & & ( info - > empty ) ) {
1999-07-07 07:32:15 +00:00
xmlBufferWriteChar ( buf , " > " ) ;
if ( cur - > next ! = NULL ) {
if ( ( cur - > next - > type ! = HTML_TEXT_NODE ) & &
( cur - > next - > type ! = HTML_ENTITY_REF_NODE ) )
xmlBufferWriteChar ( buf , " \n " ) ;
}
return ;
}
2000-03-14 18:30:20 +00:00
if ( ( cur - > content = = NULL ) & & ( cur - > children = = NULL ) ) {
1999-10-14 09:10:25 +00:00
if ( ( info ! = NULL ) & & ( info - > endTag ! = 0 ) )
1999-07-07 07:32:15 +00:00
xmlBufferWriteChar ( buf , " > " ) ;
else {
xmlBufferWriteChar ( buf , " ></ " ) ;
xmlBufferWriteCHAR ( buf , cur - > name ) ;
xmlBufferWriteChar ( buf , " > " ) ;
}
if ( cur - > next ! = NULL ) {
if ( ( cur - > next - > type ! = HTML_TEXT_NODE ) & &
( cur - > next - > type ! = HTML_ENTITY_REF_NODE ) )
xmlBufferWriteChar ( buf , " \n " ) ;
}
1999-07-07 04:19:20 +00:00
return ;
}
xmlBufferWriteChar ( buf , " > " ) ;
if ( cur - > content ! = NULL ) {
1999-09-23 22:19:22 +00:00
xmlChar * buffer ;
1999-07-07 04:19:20 +00:00
1999-12-01 09:51:45 +00:00
# ifndef XML_USE_BUFFER_CONTENT
buffer = xmlEncodeEntitiesReentrant ( doc , cur - > content ) ;
# else
buffer = xmlEncodeEntitiesReentrant ( doc ,
xmlBufferContent ( cur - > content ) ) ;
# endif
1999-07-07 04:19:20 +00:00
if ( buffer ! = NULL ) {
xmlBufferWriteCHAR ( buf , buffer ) ;
1999-09-02 22:04:43 +00:00
xmlFree ( buffer ) ;
1999-07-07 04:19:20 +00:00
}
}
2000-03-14 18:30:20 +00:00
if ( cur - > children ! = NULL ) {
if ( ( cur - > children - > type ! = HTML_TEXT_NODE ) & &
( cur - > children - > type ! = HTML_ENTITY_REF_NODE ) & &
( cur - > children ! = cur - > last ) )
1999-07-07 07:32:15 +00:00
xmlBufferWriteChar ( buf , " \n " ) ;
2000-03-14 18:30:20 +00:00
htmlNodeListDump ( buf , doc , cur - > children ) ;
1999-07-07 07:32:15 +00:00
if ( ( cur - > last - > type ! = HTML_TEXT_NODE ) & &
1999-12-01 09:51:45 +00:00
( cur - > last - > type ! = HTML_ENTITY_REF_NODE ) & &
2000-03-14 18:30:20 +00:00
( cur - > children ! = cur - > last ) )
1999-07-07 07:32:15 +00:00
xmlBufferWriteChar ( buf , " \n " ) ;
1999-07-07 04:19:20 +00:00
}
1999-12-21 15:35:29 +00:00
if ( ! htmlIsAutoClosed ( doc , cur ) ) {
xmlBufferWriteChar ( buf , " </ " ) ;
xmlBufferWriteCHAR ( buf , cur - > name ) ;
xmlBufferWriteChar ( buf , " > " ) ;
}
1999-07-07 07:32:15 +00:00
if ( cur - > next ! = NULL ) {
if ( ( cur - > next - > type ! = HTML_TEXT_NODE ) & &
( cur - > next - > type ! = HTML_ENTITY_REF_NODE ) )
xmlBufferWriteChar ( buf , " \n " ) ;
}
1999-07-07 04:19:20 +00:00
}
2000-02-02 17:15:36 +00:00
/**
* htmlNodeDumpFile :
* @ out : the FILE pointer
* @ doc : the document
* @ cur : the current node
*
* Dump an HTML node , recursive behaviour , children are printed too .
*/
void
htmlNodeDumpFile ( FILE * out , xmlDocPtr doc , xmlNodePtr cur ) {
xmlBufferPtr buf ;
buf = xmlBufferCreate ( ) ;
if ( buf = = NULL ) return ;
htmlNodeDump ( buf , doc , cur ) ;
xmlBufferDump ( out , buf ) ;
xmlBufferFree ( buf ) ;
}
1999-07-07 04:19:20 +00:00
/**
* htmlDocContentDump :
* @ buf : the HTML buffer output
* @ cur : the document
*
* Dump an HTML document .
*/
static void
htmlDocContentDump ( xmlBufferPtr buf , xmlDocPtr cur ) {
1999-12-21 15:35:29 +00:00
int type ;
/*
* force to output the stuff as HTML , especially for entities
*/
type = cur - > type ;
cur - > type = XML_HTML_DOCUMENT_NODE ;
1999-07-07 04:19:20 +00:00
if ( cur - > intSubset ! = NULL )
htmlDtdDump ( buf , cur ) ;
1999-12-21 15:35:29 +00:00
else {
/* Default to HTML-4.0 transitionnal @@@@ */
xmlBufferWriteChar ( buf , " <!DOCTYPE HTML PUBLIC \" -//W3C//DTD HTML 4.0 Transitional//EN \" \" http://www.w3.org/TR/REC-html40/loose.dtd \" > " ) ;
}
2000-03-14 18:30:20 +00:00
if ( cur - > children ! = NULL ) {
htmlNodeListDump ( buf , cur , cur - > children ) ;
1999-07-07 04:19:20 +00:00
}
1999-07-07 07:32:15 +00:00
xmlBufferWriteChar ( buf , " \n " ) ;
1999-12-21 15:35:29 +00:00
cur - > type = type ;
1999-07-07 04:19:20 +00:00
}
/**
* htmlDocDumpMemory :
* @ cur : the document
* @ mem : OUT : the memory pointer
* @ size : OUT : the memory lenght
*
1999-09-23 22:19:22 +00:00
* Dump an HTML document in memory and return the xmlChar * and it ' s size .
1999-07-07 04:19:20 +00:00
* It ' s up to the caller to free the memory .
*/
void
1999-09-23 22:19:22 +00:00
htmlDocDumpMemory ( xmlDocPtr cur , xmlChar * * mem , int * size ) {
1999-07-07 04:19:20 +00:00
xmlBufferPtr buf ;
if ( cur = = NULL ) {
# ifdef DEBUG_TREE
1999-12-29 12:49:06 +00:00
fprintf ( stderr , " htmlxmlDocDumpMemory : document == NULL \n " ) ;
1999-07-07 04:19:20 +00:00
# endif
* mem = NULL ;
* size = 0 ;
return ;
}
buf = xmlBufferCreate ( ) ;
if ( buf = = NULL ) {
* mem = NULL ;
* size = 0 ;
return ;
}
htmlDocContentDump ( buf , cur ) ;
* mem = buf - > content ;
* size = buf - > use ;
memset ( buf , - 1 , sizeof ( xmlBuffer ) ) ;
1999-09-02 22:04:43 +00:00
xmlFree ( buf ) ;
1999-07-07 04:19:20 +00:00
}
/**
* htmlDocDump :
* @ f : the FILE *
* @ cur : the document
*
* Dump an HTML document to an open FILE .
*/
void
htmlDocDump ( FILE * f , xmlDocPtr cur ) {
xmlBufferPtr buf ;
if ( cur = = NULL ) {
# ifdef DEBUG_TREE
1999-12-29 12:49:06 +00:00
fprintf ( stderr , " htmlDocDump : document == NULL \n " ) ;
1999-07-07 04:19:20 +00:00
# endif
return ;
}
buf = xmlBufferCreate ( ) ;
if ( buf = = NULL ) return ;
htmlDocContentDump ( buf , cur ) ;
xmlBufferDump ( f , buf ) ;
xmlBufferFree ( buf ) ;
}
/**
* htmlSaveFile :
* @ filename : the filename
* @ cur : the document
*
* Dump an HTML document to a file .
*
* returns : the number of byte written or - 1 in case of failure .
*/
int
htmlSaveFile ( const char * filename , xmlDocPtr cur ) {
xmlBufferPtr buf ;
FILE * output = NULL ;
int ret ;
/*
* save the content to a temp buffer .
*/
buf = xmlBufferCreate ( ) ;
if ( buf = = NULL ) return ( 0 ) ;
htmlDocContentDump ( buf , cur ) ;
output = fopen ( filename , " w " ) ;
if ( output = = NULL ) return ( - 1 ) ;
ret = xmlBufferDump ( output , buf ) ;
fclose ( output ) ;
xmlBufferFree ( buf ) ;
1999-09-23 22:19:22 +00:00
return ( ret * sizeof ( xmlChar ) ) ;
1999-07-07 04:19:20 +00:00
}
2000-04-03 19:48:13 +00:00
# endif /* LIBXML_HTML_ENABLED */