1999-07-05 16:50:46 +00:00
/*
* testHTML . c : a small tester program for HTML input .
*
* See Copyright for the status of this software .
*
2001-06-24 12:13:24 +00:00
* daniel @ veillard . com
1999-07-05 16:50:46 +00:00
*/
2001-04-21 16:57:29 +00:00
# include "libxml.h"
# ifdef LIBXML_HTML_ENABLED
1999-09-22 09:46:25 +00:00
# include <string.h>
1999-10-14 09:10:25 +00:00
# include <stdarg.h>
1999-09-22 09:46:25 +00:00
# ifdef HAVE_SYS_TYPES_H
1999-07-05 16:50:46 +00:00
# include <sys/types.h>
1999-09-22 09:46:25 +00:00
# endif
1999-07-05 16:50:46 +00:00
# ifdef HAVE_SYS_STAT_H
# include <sys/stat.h>
# endif
# ifdef HAVE_FCNTL_H
# include <fcntl.h>
# endif
# ifdef HAVE_UNISTD_H
# include <unistd.h>
# endif
1999-09-22 09:46:25 +00:00
# ifdef HAVE_STDLIB_H
1999-07-05 16:50:46 +00:00
# include <stdlib.h>
1999-09-22 09:46:25 +00:00
# endif
1999-07-05 16:50:46 +00:00
2000-04-03 19:48:13 +00:00
# include <libxml/xmlmemory.h>
# include <libxml/HTMLparser.h>
# include <libxml/HTMLtree.h>
# include <libxml/debugXML.h>
2000-10-25 19:56:55 +00:00
# include <libxml/xmlerror.h>
2001-10-17 15:58:35 +00:00
# include <libxml/globals.h>
1999-07-05 16:50:46 +00:00
2000-04-03 19:48:13 +00:00
# ifdef LIBXML_DEBUG_ENABLED
1999-07-05 16:50:46 +00:00
static int debug = 0 ;
2000-04-03 19:48:13 +00:00
# endif
1999-07-05 16:50:46 +00:00
static int copy = 0 ;
1999-10-14 09:10:25 +00:00
static int sax = 0 ;
static int repeat = 0 ;
static int noout = 0 ;
1999-12-29 12:49:06 +00:00
static int push = 0 ;
2000-07-14 14:49:25 +00:00
static char * encoding = NULL ;
1999-07-05 16:50:46 +00:00
1999-10-14 09:10:25 +00:00
xmlSAXHandler emptySAXHandlerStruct = {
NULL , /* internalSubset */
NULL , /* isStandalone */
NULL , /* hasInternalSubset */
NULL , /* hasExternalSubset */
NULL , /* resolveEntity */
NULL , /* getEntity */
NULL , /* entityDecl */
NULL , /* notationDecl */
NULL , /* attributeDecl */
NULL , /* elementDecl */
NULL , /* unparsedEntityDecl */
NULL , /* setDocumentLocator */
NULL , /* startDocument */
NULL , /* endDocument */
NULL , /* startElement */
NULL , /* endElement */
NULL , /* reference */
NULL , /* characters */
NULL , /* ignorableWhitespace */
NULL , /* processingInstruction */
NULL , /* comment */
NULL , /* xmlParserWarning */
NULL , /* xmlParserError */
NULL , /* xmlParserError */
NULL , /* getParameterEntity */
2001-03-24 17:00:36 +00:00
NULL , /* cdataBlock */
2001-10-13 09:15:48 +00:00
NULL , /* externalSubset */
1
1999-10-14 09:10:25 +00:00
} ;
xmlSAXHandlerPtr emptySAXHandler = & emptySAXHandlerStruct ;
extern xmlSAXHandlerPtr debugSAXHandler ;
/************************************************************************
* *
* Debug Handlers *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* isStandaloneDebug :
* @ ctxt : An XML parser context
*
* Is this document tagged standalone ?
*
* Returns 1 if true
*/
2001-03-24 17:00:36 +00:00
static int
2001-03-26 16:28:29 +00:00
isStandaloneDebug ( void * ctx ATTRIBUTE_UNUSED )
1999-10-14 09:10:25 +00:00
{
fprintf ( stdout , " SAX.isStandalone() \n " ) ;
return ( 0 ) ;
}
/**
* hasInternalSubsetDebug :
* @ ctxt : An XML parser context
*
* Does this document has an internal subset
*
* Returns 1 if true
*/
2001-03-24 17:00:36 +00:00
static int
2001-03-26 16:28:29 +00:00
hasInternalSubsetDebug ( void * ctx ATTRIBUTE_UNUSED )
1999-10-14 09:10:25 +00:00
{
fprintf ( stdout , " SAX.hasInternalSubset() \n " ) ;
return ( 0 ) ;
}
/**
* hasExternalSubsetDebug :
* @ ctxt : An XML parser context
*
* Does this document has an external subset
*
* Returns 1 if true
*/
2001-03-24 17:00:36 +00:00
static int
2001-03-26 16:28:29 +00:00
hasExternalSubsetDebug ( void * ctx ATTRIBUTE_UNUSED )
1999-10-14 09:10:25 +00:00
{
fprintf ( stdout , " SAX.hasExternalSubset() \n " ) ;
return ( 0 ) ;
}
/**
* hasInternalSubsetDebug :
* @ ctxt : An XML parser context
*
* Does this document has an internal subset
*/
2001-03-24 17:00:36 +00:00
static void
2001-03-26 16:28:29 +00:00
internalSubsetDebug ( void * ctx ATTRIBUTE_UNUSED , const xmlChar * name ,
1999-10-14 09:10:25 +00:00
const xmlChar * ExternalID , const xmlChar * SystemID )
{
2000-08-17 13:50:51 +00:00
fprintf ( stdout , " SAX.internalSubset(%s, " , name ) ;
if ( ExternalID = = NULL )
fprintf ( stdout , " , " ) ;
else
fprintf ( stdout , " %s, " , ExternalID ) ;
if ( SystemID = = NULL )
fprintf ( stdout , " ) \n " ) ;
else
fprintf ( stdout , " %s) \n " , SystemID ) ;
1999-10-14 09:10:25 +00:00
}
/**
* resolveEntityDebug :
* @ ctxt : An XML parser context
* @ publicId : The public ID of the entity
* @ systemId : The system ID of the entity
*
* Special entity resolver , better left to the parser , it has
* more context than the application layer .
* The default behaviour is to NOT resolve the entities , in that case
* the ENTITY_REF nodes are built in the structure ( and the parameter
* values ) .
*
* Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour .
*/
2001-03-24 17:00:36 +00:00
static xmlParserInputPtr
2001-03-26 16:28:29 +00:00
resolveEntityDebug ( void * ctx ATTRIBUTE_UNUSED , const xmlChar * publicId , const xmlChar * systemId )
1999-10-14 09:10:25 +00:00
{
/* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */
fprintf ( stdout , " SAX.resolveEntity( " ) ;
if ( publicId ! = NULL )
fprintf ( stdout , " %s " , ( char * ) publicId ) ;
else
fprintf ( stdout , " " ) ;
if ( systemId ! = NULL )
fprintf ( stdout , " , %s) \n " , ( char * ) systemId ) ;
else
fprintf ( stdout , " , ) \n " ) ;
/*********
if ( systemId ! = NULL ) {
return ( xmlNewInputFromFile ( ctxt , ( char * ) systemId ) ) ;
}
* * * * * * * * */
return ( NULL ) ;
}
/**
* getEntityDebug :
* @ ctxt : An XML parser context
* @ name : The entity name
*
* Get an entity by name
*
* Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour .
*/
2001-03-24 17:00:36 +00:00
static xmlEntityPtr
2001-03-26 16:28:29 +00:00
getEntityDebug ( void * ctx ATTRIBUTE_UNUSED , const xmlChar * name )
1999-10-14 09:10:25 +00:00
{
fprintf ( stdout , " SAX.getEntity(%s) \n " , name ) ;
return ( NULL ) ;
}
/**
* getParameterEntityDebug :
* @ ctxt : An XML parser context
* @ name : The entity name
*
* Get a parameter entity by name
*
* Returns the xmlParserInputPtr
*/
2001-03-24 17:00:36 +00:00
static xmlEntityPtr
2001-03-26 16:28:29 +00:00
getParameterEntityDebug ( void * ctx ATTRIBUTE_UNUSED , const xmlChar * name )
1999-10-14 09:10:25 +00:00
{
fprintf ( stdout , " SAX.getParameterEntity(%s) \n " , name ) ;
return ( NULL ) ;
}
/**
* entityDeclDebug :
* @ ctxt : An XML parser context
* @ name : the entity name
* @ type : the entity type
* @ publicId : The public ID of the entity
* @ systemId : The system ID of the entity
* @ content : the entity value ( without processing ) .
*
* An entity definition has been parsed
*/
2001-03-24 17:00:36 +00:00
static void
2001-03-26 16:28:29 +00:00
entityDeclDebug ( void * ctx ATTRIBUTE_UNUSED , const xmlChar * name , int type ,
1999-10-14 09:10:25 +00:00
const xmlChar * publicId , const xmlChar * systemId , xmlChar * content )
{
fprintf ( stdout , " SAX.entityDecl(%s, %d, %s, %s, %s) \n " ,
name , type , publicId , systemId , content ) ;
}
/**
* attributeDeclDebug :
* @ ctxt : An XML parser context
* @ name : the attribute name
* @ type : the attribute type
*
* An attribute definition has been parsed
*/
2001-03-24 17:00:36 +00:00
static void
2001-03-26 16:28:29 +00:00
attributeDeclDebug ( void * ctx ATTRIBUTE_UNUSED , const xmlChar * elem , const xmlChar * name ,
1999-10-14 09:10:25 +00:00
int type , int def , const xmlChar * defaultValue ,
2001-03-26 16:28:29 +00:00
xmlEnumerationPtr tree ATTRIBUTE_UNUSED )
1999-10-14 09:10:25 +00:00
{
fprintf ( stdout , " SAX.attributeDecl(%s, %s, %d, %d, %s, ...) \n " ,
elem , name , type , def , defaultValue ) ;
}
/**
* elementDeclDebug :
* @ ctxt : An XML parser context
* @ name : the element name
* @ type : the element type
* @ content : the element value ( without processing ) .
*
* An element definition has been parsed
*/
2001-03-24 17:00:36 +00:00
static void
2001-03-26 16:28:29 +00:00
elementDeclDebug ( void * ctx ATTRIBUTE_UNUSED , const xmlChar * name , int type ,
xmlElementContentPtr content ATTRIBUTE_UNUSED )
1999-10-14 09:10:25 +00:00
{
fprintf ( stdout , " SAX.elementDecl(%s, %d, ...) \n " ,
name , type ) ;
}
/**
* notationDeclDebug :
* @ ctxt : An XML parser context
* @ name : The name of the notation
* @ publicId : The public ID of the entity
* @ systemId : The system ID of the entity
*
* What to do when a notation declaration has been parsed .
*/
2001-03-24 17:00:36 +00:00
static void
2001-03-26 16:28:29 +00:00
notationDeclDebug ( void * ctx ATTRIBUTE_UNUSED , const xmlChar * name ,
1999-10-14 09:10:25 +00:00
const xmlChar * publicId , const xmlChar * systemId )
{
fprintf ( stdout , " SAX.notationDecl(%s, %s, %s) \n " ,
( char * ) name , ( char * ) publicId , ( char * ) systemId ) ;
}
/**
* unparsedEntityDeclDebug :
* @ ctxt : An XML parser context
* @ name : The name of the entity
* @ publicId : The public ID of the entity
* @ systemId : The system ID of the entity
* @ notationName : the name of the notation
*
* What to do when an unparsed entity declaration is parsed
*/
2001-03-24 17:00:36 +00:00
static void
2001-03-26 16:28:29 +00:00
unparsedEntityDeclDebug ( void * ctx ATTRIBUTE_UNUSED , const xmlChar * name ,
1999-10-14 09:10:25 +00:00
const xmlChar * publicId , const xmlChar * systemId ,
const xmlChar * notationName )
{
fprintf ( stdout , " SAX.unparsedEntityDecl(%s, %s, %s, %s) \n " ,
( char * ) name , ( char * ) publicId , ( char * ) systemId ,
( char * ) notationName ) ;
}
/**
* setDocumentLocatorDebug :
* @ ctxt : An XML parser context
* @ loc : A SAX Locator
*
* Receive the document locator at startup , actually xmlDefaultSAXLocator
* Everything is available on the context , so this is useless in our case .
*/
2001-03-24 17:00:36 +00:00
static void
2001-03-26 16:28:29 +00:00
setDocumentLocatorDebug ( void * ctx ATTRIBUTE_UNUSED , xmlSAXLocatorPtr loc ATTRIBUTE_UNUSED )
1999-10-14 09:10:25 +00:00
{
fprintf ( stdout , " SAX.setDocumentLocator() \n " ) ;
}
/**
* startDocumentDebug :
* @ ctxt : An XML parser context
*
* called when the document start being processed .
*/
2001-03-24 17:00:36 +00:00
static void
2001-03-26 16:28:29 +00:00
startDocumentDebug ( void * ctx ATTRIBUTE_UNUSED )
1999-10-14 09:10:25 +00:00
{
fprintf ( stdout , " SAX.startDocument() \n " ) ;
}
/**
* endDocumentDebug :
* @ ctxt : An XML parser context
*
* called when the document end has been detected .
*/
2001-03-24 17:00:36 +00:00
static void
2001-03-26 16:28:29 +00:00
endDocumentDebug ( void * ctx ATTRIBUTE_UNUSED )
1999-10-14 09:10:25 +00:00
{
fprintf ( stdout , " SAX.endDocument() \n " ) ;
}
/**
* startElementDebug :
* @ ctxt : An XML parser context
* @ name : The element name
*
* called when an opening tag has been processed .
*/
2001-03-24 17:00:36 +00:00
static void
2001-03-26 16:28:29 +00:00
startElementDebug ( void * ctx ATTRIBUTE_UNUSED , const xmlChar * name , const xmlChar * * atts )
1999-10-14 09:10:25 +00:00
{
int i ;
fprintf ( stdout , " SAX.startElement(%s " , ( char * ) name ) ;
if ( atts ! = NULL ) {
for ( i = 0 ; ( atts [ i ] ! = NULL ) ; i + + ) {
2000-08-17 13:50:51 +00:00
fprintf ( stdout , " , %s " , atts [ i + + ] ) ;
2000-08-28 10:04:51 +00:00
if ( atts [ i ] ! = NULL ) {
unsigned char output [ 40 ] ;
const unsigned char * att = atts [ i ] ;
int outlen , attlen ;
fprintf ( stdout , " =' " ) ;
while ( ( attlen = strlen ( ( char * ) att ) ) > 0 ) {
outlen = sizeof output - 1 ;
htmlEncodeEntities ( output , & outlen , att , & attlen , ' \' ' ) ;
fprintf ( stdout , " %.*s " , outlen , output ) ;
att + = attlen ;
}
fprintf ( stdout , " ' " ) ;
}
1999-10-14 09:10:25 +00:00
}
}
fprintf ( stdout , " ) \n " ) ;
}
/**
* endElementDebug :
* @ ctxt : An XML parser context
* @ name : The element name
*
* called when the end of an element has been detected .
*/
2001-03-24 17:00:36 +00:00
static void
2001-03-26 16:28:29 +00:00
endElementDebug ( void * ctx ATTRIBUTE_UNUSED , const xmlChar * name )
1999-10-14 09:10:25 +00:00
{
fprintf ( stdout , " SAX.endElement(%s) \n " , ( char * ) name ) ;
}
/**
* charactersDebug :
* @ ctxt : An XML parser context
* @ ch : a xmlChar string
* @ len : the number of xmlChar
*
* receiving some chars from the parser .
* Question : how much at a time ? ? ?
*/
2001-03-24 17:00:36 +00:00
static void
2001-03-26 16:28:29 +00:00
charactersDebug ( void * ctx ATTRIBUTE_UNUSED , const xmlChar * ch , int len )
1999-10-14 09:10:25 +00:00
{
2000-08-28 10:04:51 +00:00
unsigned char output [ 40 ] ;
2000-08-29 09:41:15 +00:00
int inlen = len , outlen = 30 ;
1999-10-14 09:10:25 +00:00
2000-08-29 09:41:15 +00:00
htmlEncodeEntities ( output , & outlen , ch , & inlen , 0 ) ;
2000-08-28 10:04:51 +00:00
output [ outlen ] = 0 ;
2000-08-12 21:12:04 +00:00
fprintf ( stdout , " SAX.characters(%s, %d) \n " , output , len ) ;
1999-10-14 09:10:25 +00:00
}
2000-10-14 23:38:43 +00:00
/**
* cdataDebug :
* @ ctxt : An XML parser context
* @ ch : a xmlChar string
* @ len : the number of xmlChar
*
* receiving some cdata chars from the parser .
* Question : how much at a time ? ? ?
*/
2001-03-24 17:00:36 +00:00
static void
2001-03-26 16:28:29 +00:00
cdataDebug ( void * ctx ATTRIBUTE_UNUSED , const xmlChar * ch , int len )
2000-10-14 23:38:43 +00:00
{
unsigned char output [ 40 ] ;
int inlen = len , outlen = 30 ;
htmlEncodeEntities ( output , & outlen , ch , & inlen , 0 ) ;
output [ outlen ] = 0 ;
fprintf ( stdout , " SAX.cdata(%s, %d) \n " , output , len ) ;
}
1999-10-14 09:10:25 +00:00
/**
* referenceDebug :
* @ ctxt : An XML parser context
* @ name : The entity name
*
* called when an entity reference is detected .
*/
2001-03-24 17:00:36 +00:00
static void
2001-03-26 16:28:29 +00:00
referenceDebug ( void * ctx ATTRIBUTE_UNUSED , const xmlChar * name )
1999-10-14 09:10:25 +00:00
{
fprintf ( stdout , " SAX.reference(%s) \n " , name ) ;
}
/**
* ignorableWhitespaceDebug :
* @ ctxt : An XML parser context
* @ ch : a xmlChar string
* @ start : the first char in the string
* @ len : the number of xmlChar
*
* receiving some ignorable whitespaces from the parser .
* Question : how much at a time ? ? ?
*/
2001-03-24 17:00:36 +00:00
static void
2001-03-26 16:28:29 +00:00
ignorableWhitespaceDebug ( void * ctx ATTRIBUTE_UNUSED , const xmlChar * ch , int len )
1999-10-14 09:10:25 +00:00
{
2000-08-12 21:12:04 +00:00
char output [ 40 ] ;
int i ;
for ( i = 0 ; ( i < len ) & & ( i < 30 ) ; i + + )
output [ i ] = ch [ i ] ;
output [ i ] = 0 ;
fprintf ( stdout , " SAX.ignorableWhitespace(%s, %d) \n " , output , len ) ;
1999-10-14 09:10:25 +00:00
}
/**
* processingInstructionDebug :
* @ ctxt : An XML parser context
* @ target : the target name
* @ data : the PI data ' s
* @ len : the number of xmlChar
*
* A processing instruction has been parsed .
*/
2001-03-24 17:00:36 +00:00
static void
2001-03-26 16:28:29 +00:00
processingInstructionDebug ( void * ctx ATTRIBUTE_UNUSED , const xmlChar * target ,
1999-10-14 09:10:25 +00:00
const xmlChar * data )
{
fprintf ( stdout , " SAX.processingInstruction(%s, %s) \n " ,
( char * ) target , ( char * ) data ) ;
}
/**
* commentDebug :
* @ ctxt : An XML parser context
* @ value : the comment content
*
* A comment has been parsed .
*/
2001-03-24 17:00:36 +00:00
static void
2001-03-26 16:28:29 +00:00
commentDebug ( void * ctx ATTRIBUTE_UNUSED , const xmlChar * value )
1999-10-14 09:10:25 +00:00
{
fprintf ( stdout , " SAX.comment(%s) \n " , value ) ;
}
/**
* warningDebug :
* @ ctxt : An XML parser context
* @ msg : the message to display / transmit
* @ . . . : extra parameters for the message display
*
* Display and format a warning messages , gives file , line , position and
* extra parameters .
*/
2001-03-24 17:00:36 +00:00
static void
2001-03-26 16:28:29 +00:00
warningDebug ( void * ctx ATTRIBUTE_UNUSED , const char * msg , . . . )
1999-10-14 09:10:25 +00:00
{
va_list args ;
va_start ( args , msg ) ;
fprintf ( stdout , " SAX.warning: " ) ;
vfprintf ( stdout , msg , args ) ;
va_end ( args ) ;
}
/**
* errorDebug :
* @ ctxt : An XML parser context
* @ msg : the message to display / transmit
* @ . . . : extra parameters for the message display
*
* Display and format a error messages , gives file , line , position and
* extra parameters .
*/
2001-03-24 17:00:36 +00:00
static void
2001-03-26 16:28:29 +00:00
errorDebug ( void * ctx ATTRIBUTE_UNUSED , const char * msg , . . . )
1999-10-14 09:10:25 +00:00
{
va_list args ;
va_start ( args , msg ) ;
fprintf ( stdout , " SAX.error: " ) ;
vfprintf ( stdout , msg , args ) ;
va_end ( args ) ;
}
/**
* fatalErrorDebug :
* @ ctxt : An XML parser context
* @ msg : the message to display / transmit
* @ . . . : extra parameters for the message display
*
* Display and format a fatalError messages , gives file , line , position and
* extra parameters .
*/
2001-03-24 17:00:36 +00:00
static void
2001-03-26 16:28:29 +00:00
fatalErrorDebug ( void * ctx ATTRIBUTE_UNUSED , const char * msg , . . . )
1999-10-14 09:10:25 +00:00
{
va_list args ;
va_start ( args , msg ) ;
fprintf ( stdout , " SAX.fatalError: " ) ;
vfprintf ( stdout , msg , args ) ;
va_end ( args ) ;
}
xmlSAXHandler debugSAXHandlerStruct = {
internalSubsetDebug ,
isStandaloneDebug ,
hasInternalSubsetDebug ,
hasExternalSubsetDebug ,
resolveEntityDebug ,
getEntityDebug ,
entityDeclDebug ,
notationDeclDebug ,
attributeDeclDebug ,
elementDeclDebug ,
unparsedEntityDeclDebug ,
setDocumentLocatorDebug ,
startDocumentDebug ,
endDocumentDebug ,
startElementDebug ,
endElementDebug ,
referenceDebug ,
charactersDebug ,
ignorableWhitespaceDebug ,
processingInstructionDebug ,
commentDebug ,
warningDebug ,
errorDebug ,
fatalErrorDebug ,
getParameterEntityDebug ,
2000-10-14 23:38:43 +00:00
cdataDebug ,
2001-10-13 09:15:48 +00:00
NULL ,
1
1999-10-14 09:10:25 +00:00
} ;
xmlSAXHandlerPtr debugSAXHandler = & debugSAXHandlerStruct ;
1999-07-05 16:50:46 +00:00
/************************************************************************
* *
* Debug *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2001-03-24 17:00:36 +00:00
static void
parseSAXFile ( char * filename ) {
2000-08-26 21:40:43 +00:00
htmlDocPtr doc = NULL ;
1999-10-14 09:10:25 +00:00
/*
* Empty callbacks for checking
*/
2000-08-12 21:12:04 +00:00
if ( push ) {
FILE * f ;
f = fopen ( filename , " r " ) ;
if ( f ! = NULL ) {
int res , size = 3 ;
char chars [ 4096 ] ;
htmlParserCtxtPtr ctxt ;
1999-10-14 09:10:25 +00:00
2000-08-12 21:12:04 +00:00
/* if (repeat) */
size = 4096 ;
res = fread ( chars , 1 , 4 , f ) ;
if ( res > 0 ) {
ctxt = htmlCreatePushParserCtxt ( emptySAXHandler , NULL ,
chars , res , filename , 0 ) ;
while ( ( res = fread ( chars , 1 , size , f ) ) > 0 ) {
htmlParseChunk ( ctxt , chars , res , 0 ) ;
}
htmlParseChunk ( ctxt , chars , 0 , 1 ) ;
doc = ctxt - > myDoc ;
htmlFreeParserCtxt ( ctxt ) ;
}
if ( doc ! = NULL ) {
fprintf ( stdout , " htmlSAXParseFile returned non-NULL \n " ) ;
xmlFreeDoc ( doc ) ;
}
fclose ( f ) ;
}
if ( ! noout ) {
f = fopen ( filename , " r " ) ;
if ( f ! = NULL ) {
int res , size = 3 ;
char chars [ 4096 ] ;
htmlParserCtxtPtr ctxt ;
/* if (repeat) */
size = 4096 ;
res = fread ( chars , 1 , 4 , f ) ;
if ( res > 0 ) {
ctxt = htmlCreatePushParserCtxt ( debugSAXHandler , NULL ,
chars , res , filename , 0 ) ;
while ( ( res = fread ( chars , 1 , size , f ) ) > 0 ) {
htmlParseChunk ( ctxt , chars , res , 0 ) ;
}
htmlParseChunk ( ctxt , chars , 0 , 1 ) ;
doc = ctxt - > myDoc ;
htmlFreeParserCtxt ( ctxt ) ;
}
if ( doc ! = NULL ) {
fprintf ( stdout , " htmlSAXParseFile returned non-NULL \n " ) ;
xmlFreeDoc ( doc ) ;
}
fclose ( f ) ;
}
}
} else {
doc = htmlSAXParseFile ( filename , NULL , emptySAXHandler , NULL ) ;
1999-10-14 09:10:25 +00:00
if ( doc ! = NULL ) {
fprintf ( stdout , " htmlSAXParseFile returned non-NULL \n " ) ;
xmlFreeDoc ( doc ) ;
}
2000-08-12 21:12:04 +00:00
if ( ! noout ) {
/*
* Debug callback
*/
doc = htmlSAXParseFile ( filename , NULL , debugSAXHandler , NULL ) ;
if ( doc ! = NULL ) {
fprintf ( stdout , " htmlSAXParseFile returned non-NULL \n " ) ;
xmlFreeDoc ( doc ) ;
}
}
1999-10-14 09:10:25 +00:00
}
}
2001-03-24 17:00:36 +00:00
static void
parseAndPrintFile ( char * filename ) {
2000-01-09 21:08:56 +00:00
htmlDocPtr doc = NULL , tmp ;
1999-07-05 16:50:46 +00:00
/*
* build an HTML tree from a string ;
*/
1999-12-29 12:49:06 +00:00
if ( push ) {
FILE * f ;
f = fopen ( filename , " r " ) ;
if ( f ! = NULL ) {
int res , size = 3 ;
2000-08-12 21:12:04 +00:00
char chars [ 4096 ] ;
1999-12-29 12:49:06 +00:00
htmlParserCtxtPtr ctxt ;
2000-08-12 21:12:04 +00:00
/* if (repeat) */
size = 4096 ;
1999-12-29 12:49:06 +00:00
res = fread ( chars , 1 , 4 , f ) ;
if ( res > 0 ) {
ctxt = htmlCreatePushParserCtxt ( NULL , NULL ,
chars , res , filename , 0 ) ;
while ( ( res = fread ( chars , 1 , size , f ) ) > 0 ) {
htmlParseChunk ( ctxt , chars , res , 0 ) ;
}
htmlParseChunk ( ctxt , chars , 0 , 1 ) ;
doc = ctxt - > myDoc ;
htmlFreeParserCtxt ( ctxt ) ;
}
2000-08-12 21:12:04 +00:00
fclose ( f ) ;
1999-12-29 12:49:06 +00:00
}
} else {
doc = htmlParseFile ( filename , NULL ) ;
}
if ( doc = = NULL ) {
2000-10-25 19:56:55 +00:00
xmlGenericError ( xmlGenericErrorContext ,
" Could not parse %s \n " , filename ) ;
1999-12-29 12:49:06 +00:00
}
1999-07-05 16:50:46 +00:00
/*
* test intermediate copy if needed .
*/
if ( copy ) {
tmp = doc ;
doc = xmlCopyDoc ( doc , 1 ) ;
xmlFreeDoc ( tmp ) ;
}
/*
* print it .
*/
1999-10-14 09:10:25 +00:00
if ( ! noout ) {
2000-04-03 19:48:13 +00:00
# ifdef LIBXML_DEBUG_ENABLED
2000-07-14 14:49:25 +00:00
if ( ! debug ) {
if ( encoding )
htmlSaveFileEnc ( " - " , doc , encoding ) ;
else
htmlDocDump ( stdout , doc ) ;
} else
1999-10-14 09:10:25 +00:00
xmlDebugDumpDocument ( stdout , doc ) ;
2000-04-03 19:48:13 +00:00
# else
2000-07-14 14:49:25 +00:00
if ( encoding )
htmlSaveFileEnc ( " - " , doc , encoding ) ;
else
htmlDocDump ( stdout , doc ) ;
2000-04-03 19:48:13 +00:00
# endif
1999-10-14 09:10:25 +00:00
}
1999-07-05 16:50:46 +00:00
/*
* free it .
*/
xmlFreeDoc ( doc ) ;
}
int main ( int argc , char * * argv ) {
1999-10-14 09:10:25 +00:00
int i , count ;
1999-07-05 16:50:46 +00:00
int files = 0 ;
for ( i = 1 ; i < argc ; i + + ) {
2000-04-03 19:48:13 +00:00
# ifdef LIBXML_DEBUG_ENABLED
1999-07-05 16:50:46 +00:00
if ( ( ! strcmp ( argv [ i ] , " -debug " ) ) | | ( ! strcmp ( argv [ i ] , " --debug " ) ) )
debug + + ;
2000-04-03 19:48:13 +00:00
else
# endif
if ( ( ! strcmp ( argv [ i ] , " -copy " ) ) | | ( ! strcmp ( argv [ i ] , " --copy " ) ) )
1999-07-05 16:50:46 +00:00
copy + + ;
1999-12-29 12:49:06 +00:00
else if ( ( ! strcmp ( argv [ i ] , " -push " ) ) | | ( ! strcmp ( argv [ i ] , " --push " ) ) )
push + + ;
1999-10-14 09:10:25 +00:00
else if ( ( ! strcmp ( argv [ i ] , " -sax " ) ) | | ( ! strcmp ( argv [ i ] , " --sax " ) ) )
sax + + ;
else if ( ( ! strcmp ( argv [ i ] , " -noout " ) ) | | ( ! strcmp ( argv [ i ] , " --noout " ) ) )
noout + + ;
else if ( ( ! strcmp ( argv [ i ] , " -repeat " ) ) | |
( ! strcmp ( argv [ i ] , " --repeat " ) ) )
repeat + + ;
2000-07-14 14:49:25 +00:00
else if ( ( ! strcmp ( argv [ i ] , " -encode " ) ) | |
( ! strcmp ( argv [ i ] , " --encode " ) ) ) {
i + + ;
encoding = argv [ i ] ;
}
1999-07-05 16:50:46 +00:00
}
for ( i = 1 ; i < argc ; i + + ) {
2000-07-14 14:49:25 +00:00
if ( ( ! strcmp ( argv [ i ] , " -encode " ) ) | |
( ! strcmp ( argv [ i ] , " --encode " ) ) ) {
i + + ;
continue ;
}
1999-07-05 16:50:46 +00:00
if ( argv [ i ] [ 0 ] ! = ' - ' ) {
1999-10-14 09:10:25 +00:00
if ( repeat ) {
for ( count = 0 ; count < 100 * repeat ; count + + ) {
if ( sax )
parseSAXFile ( argv [ i ] ) ;
else
parseAndPrintFile ( argv [ i ] ) ;
}
} else {
if ( sax )
parseSAXFile ( argv [ i ] ) ;
else
parseAndPrintFile ( argv [ i ] ) ;
}
1999-07-05 16:50:46 +00:00
files + + ;
}
}
if ( files = = 0 ) {
1999-10-14 09:10:25 +00:00
printf ( " Usage : %s [--debug] [--copy] [--copy] HTMLfiles ... \n " ,
1999-07-05 16:50:46 +00:00
argv [ 0 ] ) ;
printf ( " \t Parse the HTML files and output the result of the parsing \n " ) ;
2000-04-03 19:48:13 +00:00
# ifdef LIBXML_DEBUG_ENABLED
1999-07-05 16:50:46 +00:00
printf ( " \t --debug : dump a debug tree of the in-memory document \n " ) ;
2000-04-03 19:48:13 +00:00
# endif
1999-07-05 16:50:46 +00:00
printf ( " \t --copy : used to test the internal copy implementation \n " ) ;
1999-10-14 09:10:25 +00:00
printf ( " \t --sax : debug the sequence of SAX callbacks \n " ) ;
1999-12-29 12:49:06 +00:00
printf ( " \t --repeat : parse the file 100 times, for timing \n " ) ;
1999-10-14 09:10:25 +00:00
printf ( " \t --noout : do not print the result \n " ) ;
1999-12-29 12:49:06 +00:00
printf ( " \t --push : use the push mode parser \n " ) ;
2000-07-14 14:49:25 +00:00
printf ( " \t --encode encoding : output in the given encoding \n " ) ;
1999-07-05 16:50:46 +00:00
}
1999-12-01 09:51:45 +00:00
xmlCleanupParser ( ) ;
1999-10-14 09:10:25 +00:00
xmlMemoryDump ( ) ;
1999-07-05 16:50:46 +00:00
return ( 0 ) ;
}
2000-04-03 19:48:13 +00:00
# else /* !LIBXML_HTML_ENABLED */
# include <stdio.h>
int main ( int argc , char * * argv ) {
printf ( " %s : HTML support not compiled in \n " , argv [ 0 ] ) ;
return ( 0 ) ;
}
# endif