1999-07-05 20:50:46 +04:00
/*
* HTMLparser . h : inf = terface for an HTML 4.0 non - verifying parser
*
* See Copyright for the status of this software .
*
* Daniel . Veillard @ w3 . org
*/
# ifndef __HTML_PARSER_H__
# define __HTML_PARSER_H__
# include "parser.h"
1999-12-18 18:32:46 +03:00
# ifdef __cplusplus
1999-12-21 18:35:29 +03:00
extern " C " {
1999-12-18 18:32:46 +03:00
# endif
1999-07-07 02:25:25 +04:00
/*
* Most of the back - end structures from XML and HTML are shared
*/
1999-07-05 20:50:46 +04:00
typedef xmlParserCtxt htmlParserCtxt ;
typedef xmlParserCtxtPtr htmlParserCtxtPtr ;
typedef xmlParserNodeInfo htmlParserNodeInfo ;
typedef xmlSAXHandler htmlSAXHandler ;
typedef xmlSAXHandlerPtr htmlSAXHandlerPtr ;
typedef xmlParserInput htmlParserInput ;
typedef xmlParserInputPtr htmlParserInputPtr ;
typedef xmlDocPtr htmlDocPtr ;
typedef xmlNodePtr htmlNodePtr ;
1999-07-07 02:25:25 +04:00
/*
* Internal description of an HTML element
*/
2000-01-05 17:46:17 +03:00
typedef struct _htmlElemDesc htmlElemDesc ;
typedef htmlElemDesc * htmlElemDescPtr ;
struct _htmlElemDesc {
1999-08-30 01:02:19 +04:00
const char * name ; /* The tag name */
1999-07-07 02:25:25 +04:00
int startTag ; /* Whether the start tag can be implied */
int endTag ; /* Whether the end tag can be implied */
int empty ; /* Is this an empty element ? */
int depr ; /* Is this a deprecated element ? */
int dtd ; /* 1: only in Loose DTD, 2: only Frameset one */
const char * desc ; /* the description */
2000-01-05 17:46:17 +03:00
} ;
1999-07-07 02:25:25 +04:00
/*
* Internal description of an HTML entity
*/
2000-01-05 17:46:17 +03:00
typedef struct _htmlEntityDesc htmlEntityDesc ;
typedef htmlEntityDesc * htmlEntityDescPtr ;
struct _htmlEntityDesc {
1999-07-07 02:25:25 +04:00
int value ; /* the UNICODE value for the character */
1999-08-30 01:02:19 +04:00
const char * name ; /* The entity name */
1999-07-07 02:25:25 +04:00
const char * desc ; /* the description */
2000-01-05 17:46:17 +03:00
} ;
1999-07-07 02:25:25 +04:00
/*
* There is only few public functions .
*/
1999-12-21 18:35:29 +03:00
htmlElemDescPtr htmlTagLookup ( const xmlChar * tag ) ;
htmlEntityDescPtr htmlEntityLookup ( const xmlChar * name ) ;
1999-07-07 11:32:15 +04:00
1999-12-21 18:35:29 +03:00
int htmlIsAutoClosed ( htmlDocPtr doc ,
htmlNodePtr elem ) ;
int htmlAutoCloseTag ( htmlDocPtr doc ,
const xmlChar * name ,
htmlNodePtr elem ) ;
htmlEntityDescPtr htmlParseEntityRef ( htmlParserCtxtPtr ctxt ,
xmlChar * * str ) ;
int htmlParseCharRef ( htmlParserCtxtPtr ctxt ) ;
void htmlParseElement ( htmlParserCtxtPtr ctxt ) ;
1999-07-05 20:50:46 +04:00
1999-12-21 18:35:29 +03:00
htmlDocPtr htmlSAXParseDoc ( xmlChar * cur ,
const char * encoding ,
htmlSAXHandlerPtr sax ,
void * userData ) ;
htmlDocPtr htmlParseDoc ( xmlChar * cur ,
const char * encoding ) ;
htmlDocPtr htmlSAXParseFile ( const char * filename ,
const char * encoding ,
htmlSAXHandlerPtr sax ,
void * userData ) ;
htmlDocPtr htmlParseFile ( const char * filename ,
const char * encoding ) ;
1999-07-05 20:50:46 +04:00
1999-12-29 15:49:06 +03:00
/**
* Interfaces for the Push mode
*/
void htmlFreeParserCtxt ( htmlParserCtxtPtr ctxt ) ;
htmlParserCtxtPtr htmlCreatePushParserCtxt ( htmlSAXHandlerPtr sax ,
void * user_data ,
const char * chunk ,
int size ,
const char * filename ,
xmlCharEncoding enc ) ;
int htmlParseChunk ( htmlParserCtxtPtr ctxt ,
const char * chunk ,
int size ,
int terminate ) ;
1999-12-18 18:32:46 +03:00
# ifdef __cplusplus
}
# endif
1999-07-05 20:50:46 +04:00
# endif /* __HTML_PARSER_H__ */