1998-08-13 07:39:55 +04:00
/*
1999-08-10 23:04:08 +04:00
* parser . h : Interfaces , constants and types related to the XML parser .
1998-08-13 07:39:55 +04:00
*
* See Copyright for the status of this software .
*
1999-01-17 22:11:59 +03:00
* Daniel . Veillard @ w3 . org
1998-08-13 07:39:55 +04:00
*/
# ifndef __XML_PARSER_H__
# define __XML_PARSER_H__
# include "tree.h"
1999-08-10 23:04:08 +04:00
# include "valid.h"
1999-06-23 01:49:07 +04:00
# include "xmlIO.h"
1999-12-12 16:03:50 +03:00
# include "entities.h"
1998-08-13 07:39:55 +04:00
# ifdef __cplusplus
extern " C " {
# endif
/*
* Constants .
*/
# define XML_DEFAULT_VERSION "1.0"
1999-08-10 23:04:08 +04:00
/**
* an xmlParserInput is an input flow for the XML processor .
* Each entity parsed is associated an xmlParserInput ( except the
* few predefined ones ) . This is the case both for internal entities
* - in which case the flow is already completely in memory - or
* external entities - in which case we use the buf structure for
* progressive reading and I18N conversions to the internal UTF - 8 format .
*/
1999-09-24 02:19:22 +04:00
typedef void ( * xmlParserInputDeallocate ) ( xmlChar * ) ;
2000-01-05 17:46:17 +03:00
typedef struct _xmlParserInput xmlParserInput ;
typedef xmlParserInput * xmlParserInputPtr ;
struct _xmlParserInput {
1999-06-23 01:49:07 +04:00
/* Input buffer */
xmlParserInputBufferPtr buf ; /* UTF-8 encoded buffer */
1998-08-13 07:39:55 +04:00
const char * filename ; /* The file analyzed, if any */
1999-08-10 23:04:08 +04:00
const char * directory ; /* the directory/base of teh file */
1999-12-28 19:35:14 +03:00
const xmlChar * base ; /* Base of the array to parse */
const xmlChar * cur ; /* Current char being parsed */
int length ; /* length if known */
1998-08-13 07:39:55 +04:00
int line ; /* Current line */
int col ; /* Current column */
1999-12-28 19:35:14 +03:00
int consumed ; /* How many xmlChars already consumed */
1999-03-01 00:54:31 +03:00
xmlParserInputDeallocate free ; /* function to deallocate the base */
2000-01-05 17:46:17 +03:00
} ;
1998-08-13 07:39:55 +04:00
1999-08-10 23:04:08 +04:00
/**
* the parser can be asked to collect Node informations , i . e . at what
* place in the file they were detected .
* NOTE : This is off by default and not very well tested .
*/
2000-01-05 17:46:17 +03:00
typedef struct _xmlParserNodeInfo xmlParserNodeInfo ;
typedef xmlParserNodeInfo * xmlParserNodeInfoPtr ;
struct _xmlParserNodeInfo {
const struct _xmlNode * node ;
1998-08-13 07:39:55 +04:00
/* Position & line # that text that created the node begins & ends on */
unsigned long begin_pos ;
unsigned long begin_line ;
unsigned long end_pos ;
unsigned long end_line ;
2000-01-05 17:46:17 +03:00
} ;
1998-08-13 07:39:55 +04:00
2000-01-05 17:46:17 +03:00
typedef struct _xmlParserNodeInfoSeq xmlParserNodeInfoSeq ;
typedef xmlParserNodeInfoSeq * xmlParserNodeInfoSeqPtr ;
struct _xmlParserNodeInfoSeq {
1998-08-13 07:39:55 +04:00
unsigned long maximum ;
unsigned long length ;
xmlParserNodeInfo * buffer ;
2000-01-05 17:46:17 +03:00
} ;
1998-08-13 07:39:55 +04:00
1999-08-10 23:04:08 +04:00
/**
2000-01-05 17:46:17 +03:00
* The parser is now working also as a state based parser
* The recursive one use the stagte info for entities processing
1999-08-10 23:04:08 +04:00
*/
1999-10-08 13:40:39 +04:00
typedef enum {
1999-12-28 19:35:14 +03:00
XML_PARSER_EOF = - 1 , /* nothing is to be parsed */
XML_PARSER_START = 0 , /* nothing has been parsed */
XML_PARSER_MISC , /* Misc* before int subset */
XML_PARSER_PI , /* Whithin a processing instruction */
XML_PARSER_DTD , /* within some DTD content */
XML_PARSER_PROLOG , /* Misc* after internal subset */
XML_PARSER_COMMENT , /* within a comment */
XML_PARSER_START_TAG , /* within a start tag */
XML_PARSER_CONTENT , /* within the content */
XML_PARSER_CDATA_SECTION , /* within a CDATA section */
XML_PARSER_END_TAG , /* within a closing tag */
XML_PARSER_ENTITY_DECL , /* within an entity declaration */
XML_PARSER_ENTITY_VALUE , /* within an entity value in a decl */
XML_PARSER_ATTRIBUTE_VALUE , /* within an attribute value */
XML_PARSER_EPILOG /* the Misc* after the last end tag */
1999-08-10 23:04:08 +04:00
} xmlParserInputState ;
/**
* The parser context .
* NOTE This doesn ' t completely defines the parser state , the ( current ? )
* design of the parser uses recursive function calls since this allow
* and easy mapping from the production rules of the specification
* to the actual code . The drawback is that the actual function call
* also reflect the parser state . However most of the parsing routines
* takes as the only argument the parser context pointer , so migrating
* to a state based parser for progressive parsing shouldn ' t be too hard .
*/
2000-01-05 17:46:17 +03:00
typedef struct _xmlParserCtxt xmlParserCtxt ;
typedef xmlParserCtxt * xmlParserCtxtPtr ;
struct _xmlParserCtxt {
struct _xmlSAXHandler * sax ; /* The SAX handler */
1999-04-05 16:20:10 +04:00
void * userData ; /* the document being built */
xmlDocPtr myDoc ; /* the document being built */
1999-09-22 13:46:25 +04:00
int wellFormed ; /* is the document well formed */
1999-06-02 21:44:04 +04:00
int replaceEntities ; /* shall we replace entities ? */
1999-09-24 02:19:22 +04:00
const xmlChar * version ; /* the XML version string */
const xmlChar * encoding ; /* encoding, if any */
1999-08-10 23:04:08 +04:00
int standalone ; /* standalone document */
int html ; /* are we parsing an HTML document */
1998-08-13 07:39:55 +04:00
/* Input stream stack */
xmlParserInputPtr input ; /* Current input stream */
int inputNr ; /* Number of current input streams */
int inputMax ; /* Max number of input streams */
xmlParserInputPtr * inputTab ; /* stack of inputs */
1999-08-10 23:04:08 +04:00
/* Node analysis stack only used for DOM building */
1998-08-13 07:39:55 +04:00
xmlNodePtr node ; /* Current parsed Node */
int nodeNr ; /* Depth of the parsing stack */
int nodeMax ; /* Max depth of the parsing stack */
xmlNodePtr * nodeTab ; /* array of nodes */
int record_info ; /* Whether node info should be kept */
xmlParserNodeInfoSeq node_seq ; /* info about each node parsed */
1999-09-22 13:46:25 +04:00
1999-09-24 02:19:22 +04:00
int errNo ; /* error code */
1999-09-22 13:46:25 +04:00
int hasExternalSubset ; /* reference and external subset */
int hasPErefs ; /* the internal subset has PE refs */
int external ; /* are we parsing an external entity */
int valid ; /* is the document valid */
int validate ; /* shall we try to validate ? */
xmlValidCtxt vctxt ; /* The validity context */
xmlParserInputState instate ; /* current type of input */
int token ; /* next char look-ahead */
char * directory ; /* the data directory */
1999-10-08 18:37:09 +04:00
/* Node name stack only used for HTML parsing */
xmlChar * name ; /* Current parsed Node */
int nameNr ; /* Depth of the parsing stack */
int nameMax ; /* Max depth of the parsing stack */
xmlChar * * nameTab ; /* array of nodes */
1999-12-12 16:03:50 +03:00
long nbChars ; /* number of xmlChar processed */
1999-12-28 19:35:14 +03:00
long checkIndex ; /* used by progressive parsing lookup */
2000-03-02 06:33:32 +03:00
int keepBlanks ; /* ugly but ... */
2000-01-05 17:46:17 +03:00
} ;
1998-08-13 07:39:55 +04:00
1999-08-10 23:04:08 +04:00
/**
1998-08-13 07:39:55 +04:00
* a SAX Locator .
*/
2000-01-05 17:46:17 +03:00
typedef struct _xmlSAXLocator xmlSAXLocator ;
typedef xmlSAXLocator * xmlSAXLocatorPtr ;
struct _xmlSAXLocator {
1999-09-24 02:19:22 +04:00
const xmlChar * ( * getPublicId ) ( void * ctx ) ;
const xmlChar * ( * getSystemId ) ( void * ctx ) ;
1999-05-29 15:51:49 +04:00
int ( * getLineNumber ) ( void * ctx ) ;
int ( * getColumnNumber ) ( void * ctx ) ;
2000-01-05 17:46:17 +03:00
} ;
1998-08-13 07:39:55 +04:00
1999-08-10 23:04:08 +04:00
/**
* a SAX handler is bunch of callbacks called by the parser when processing
* of the input generate data or structure informations .
1998-08-13 07:39:55 +04:00
*/
1999-05-29 15:51:49 +04:00
typedef xmlParserInputPtr ( * resolveEntitySAXFunc ) ( void * ctx ,
1999-09-24 02:19:22 +04:00
const xmlChar * publicId , const xmlChar * systemId ) ;
typedef void ( * internalSubsetSAXFunc ) ( void * ctx , const xmlChar * name ,
const xmlChar * ExternalID , const xmlChar * SystemID ) ;
1999-05-29 15:51:49 +04:00
typedef xmlEntityPtr ( * getEntitySAXFunc ) ( void * ctx ,
1999-09-24 02:19:22 +04:00
const xmlChar * name ) ;
1999-08-10 23:04:08 +04:00
typedef xmlEntityPtr ( * getParameterEntitySAXFunc ) ( void * ctx ,
1999-09-24 02:19:22 +04:00
const xmlChar * name ) ;
1999-05-29 15:51:49 +04:00
typedef void ( * entityDeclSAXFunc ) ( void * ctx ,
1999-09-24 02:19:22 +04:00
const xmlChar * name , int type , const xmlChar * publicId ,
const xmlChar * systemId , xmlChar * content ) ;
typedef void ( * notationDeclSAXFunc ) ( void * ctx , const xmlChar * name ,
const xmlChar * publicId , const xmlChar * systemId ) ;
typedef void ( * attributeDeclSAXFunc ) ( void * ctx , const xmlChar * elem ,
const xmlChar * name , int type , int def ,
const xmlChar * defaultValue , xmlEnumerationPtr tree ) ;
typedef void ( * elementDeclSAXFunc ) ( void * ctx , const xmlChar * name ,
1999-04-05 16:20:10 +04:00
int type , xmlElementContentPtr content ) ;
1999-05-29 15:51:49 +04:00
typedef void ( * unparsedEntityDeclSAXFunc ) ( void * ctx ,
1999-09-24 02:19:22 +04:00
const xmlChar * name , const xmlChar * publicId ,
const xmlChar * systemId , const xmlChar * notationName ) ;
1999-05-29 15:51:49 +04:00
typedef void ( * setDocumentLocatorSAXFunc ) ( void * ctx ,
1998-08-13 07:39:55 +04:00
xmlSAXLocatorPtr loc ) ;
1999-05-29 15:51:49 +04:00
typedef void ( * startDocumentSAXFunc ) ( void * ctx ) ;
typedef void ( * endDocumentSAXFunc ) ( void * ctx ) ;
1999-09-24 02:19:22 +04:00
typedef void ( * startElementSAXFunc ) ( void * ctx , const xmlChar * name ,
const xmlChar * * atts ) ;
typedef void ( * endElementSAXFunc ) ( void * ctx , const xmlChar * name ) ;
typedef void ( * attributeSAXFunc ) ( void * ctx , const xmlChar * name ,
const xmlChar * value ) ;
typedef void ( * referenceSAXFunc ) ( void * ctx , const xmlChar * name ) ;
typedef void ( * charactersSAXFunc ) ( void * ctx , const xmlChar * ch ,
1999-04-05 16:20:10 +04:00
int len ) ;
1999-05-29 15:51:49 +04:00
typedef void ( * ignorableWhitespaceSAXFunc ) ( void * ctx ,
1999-09-24 02:19:22 +04:00
const xmlChar * ch , int len ) ;
1999-05-29 15:51:49 +04:00
typedef void ( * processingInstructionSAXFunc ) ( void * ctx ,
1999-09-24 02:19:22 +04:00
const xmlChar * target , const xmlChar * data ) ;
typedef void ( * commentSAXFunc ) ( void * ctx , const xmlChar * value ) ;
typedef void ( * cdataBlockSAXFunc ) ( void * ctx , const xmlChar * value , int len ) ;
1999-05-29 15:51:49 +04:00
typedef void ( * warningSAXFunc ) ( void * ctx , const char * msg , . . . ) ;
typedef void ( * errorSAXFunc ) ( void * ctx , const char * msg , . . . ) ;
typedef void ( * fatalErrorSAXFunc ) ( void * ctx , const char * msg , . . . ) ;
typedef int ( * isStandaloneSAXFunc ) ( void * ctx ) ;
typedef int ( * hasInternalSubsetSAXFunc ) ( void * ctx ) ;
typedef int ( * hasExternalSubsetSAXFunc ) ( void * ctx ) ;
1998-08-13 07:39:55 +04:00
2000-01-05 17:46:17 +03:00
typedef struct _xmlSAXHandler xmlSAXHandler ;
typedef xmlSAXHandler * xmlSAXHandlerPtr ;
struct _xmlSAXHandler {
1999-04-05 16:20:10 +04:00
internalSubsetSAXFunc internalSubset ;
isStandaloneSAXFunc isStandalone ;
hasInternalSubsetSAXFunc hasInternalSubset ;
hasExternalSubsetSAXFunc hasExternalSubset ;
1998-08-13 07:39:55 +04:00
resolveEntitySAXFunc resolveEntity ;
1999-04-05 16:20:10 +04:00
getEntitySAXFunc getEntity ;
entityDeclSAXFunc entityDecl ;
1998-08-13 07:39:55 +04:00
notationDeclSAXFunc notationDecl ;
1999-04-05 16:20:10 +04:00
attributeDeclSAXFunc attributeDecl ;
elementDeclSAXFunc elementDecl ;
1998-08-13 07:39:55 +04:00
unparsedEntityDeclSAXFunc unparsedEntityDecl ;
setDocumentLocatorSAXFunc setDocumentLocator ;
startDocumentSAXFunc startDocument ;
endDocumentSAXFunc endDocument ;
startElementSAXFunc startElement ;
endElementSAXFunc endElement ;
1999-04-05 16:20:10 +04:00
referenceSAXFunc reference ;
1998-08-13 07:39:55 +04:00
charactersSAXFunc characters ;
ignorableWhitespaceSAXFunc ignorableWhitespace ;
processingInstructionSAXFunc processingInstruction ;
1999-04-05 16:20:10 +04:00
commentSAXFunc comment ;
1998-08-13 07:39:55 +04:00
warningSAXFunc warning ;
errorSAXFunc error ;
fatalErrorSAXFunc fatalError ;
1999-08-10 23:04:08 +04:00
getParameterEntitySAXFunc getParameterEntity ;
cdataBlockSAXFunc cdataBlock ;
2000-01-05 17:46:17 +03:00
} ;
1998-08-13 07:39:55 +04:00
2000-01-03 14:08:02 +03:00
/**
* External entity loaders types
*/
typedef xmlParserInputPtr ( * xmlExternalEntityLoader ) ( const char * URL ,
const char * ID ,
xmlParserCtxtPtr context ) ;
1999-08-10 23:04:08 +04:00
/**
1999-09-03 02:04:43 +04:00
* Global variables : just the default SAX interface tables and XML
* version infos .
1998-08-13 07:39:55 +04:00
*/
1999-06-23 01:49:07 +04:00
extern const char * xmlParserVersion ;
1998-09-23 04:49:46 +04:00
extern xmlSAXLocator xmlDefaultSAXLocator ;
extern xmlSAXHandler xmlDefaultSAXHandler ;
1999-07-05 20:50:46 +04:00
extern xmlSAXHandler htmlDefaultSAXHandler ;
1998-08-13 07:39:55 +04:00
1999-09-03 02:04:43 +04:00
/**
* entity substitution default behaviour .
*/
extern int xmlSubstituteEntitiesDefaultValue ;
1998-10-27 09:21:04 +03:00
1999-08-10 23:04:08 +04:00
/**
1999-11-24 21:04:22 +03:00
* Cleanup
1999-07-27 23:52:06 +04:00
*/
1999-11-24 21:04:22 +03:00
void xmlCleanupParser ( void ) ;
1999-07-27 23:52:06 +04:00
1999-11-24 21:04:22 +03:00
/**
* Input functions
*/
1999-08-30 01:02:19 +04:00
int xmlParserInputRead ( xmlParserInputPtr in ,
int len ) ;
int xmlParserInputGrow ( xmlParserInputPtr in ,
int len ) ;
1999-07-27 23:52:06 +04:00
1999-08-10 23:04:08 +04:00
/**
1999-09-24 02:19:22 +04:00
* xmlChar handling
1998-08-13 07:39:55 +04:00
*/
1999-09-26 15:31:02 +04:00
xmlChar * xmlStrdup ( const xmlChar * cur ) ;
xmlChar * xmlStrndup ( const xmlChar * cur ,
1999-08-30 01:02:19 +04:00
int len ) ;
1999-09-26 15:31:02 +04:00
xmlChar * xmlStrsub ( const xmlChar * str ,
1999-08-30 01:02:19 +04:00
int start ,
int len ) ;
1999-09-24 02:19:22 +04:00
const xmlChar * xmlStrchr ( const xmlChar * str ,
xmlChar val ) ;
const xmlChar * xmlStrstr ( const xmlChar * str ,
xmlChar * val ) ;
int xmlStrcmp ( const xmlChar * str1 ,
const xmlChar * str2 ) ;
int xmlStrncmp ( const xmlChar * str1 ,
const xmlChar * str2 ,
1999-08-30 01:02:19 +04:00
int len ) ;
1999-09-24 02:19:22 +04:00
int xmlStrlen ( const xmlChar * str ) ;
1999-09-26 15:31:02 +04:00
xmlChar * xmlStrcat ( xmlChar * cur ,
1999-09-24 02:19:22 +04:00
const xmlChar * add ) ;
1999-09-26 15:31:02 +04:00
xmlChar * xmlStrncat ( xmlChar * cur ,
1999-09-24 02:19:22 +04:00
const xmlChar * add ,
1999-08-30 01:02:19 +04:00
int len ) ;
1998-08-13 07:39:55 +04:00
1999-08-10 23:04:08 +04:00
/**
* Basic parsing Interfaces
1999-01-17 22:11:59 +03:00
*/
1999-09-24 02:19:22 +04:00
xmlDocPtr xmlParseDoc ( xmlChar * cur ) ;
1999-08-30 01:02:19 +04:00
xmlDocPtr xmlParseMemory ( char * buffer ,
int size ) ;
xmlDocPtr xmlParseFile ( const char * filename ) ;
int xmlSubstituteEntitiesDefault ( int val ) ;
2000-03-04 14:39:43 +03:00
int xmlKeepBlanksDefault ( int val ) ;
1999-01-17 22:11:59 +03:00
1999-08-10 23:04:08 +04:00
/**
1999-01-17 22:11:59 +03:00
* Recovery mode
*/
1999-09-24 02:19:22 +04:00
xmlDocPtr xmlRecoverDoc ( xmlChar * cur ) ;
1999-08-30 01:02:19 +04:00
xmlDocPtr xmlRecoverMemory ( char * buffer ,
int size ) ;
xmlDocPtr xmlRecoverFile ( const char * filename ) ;
1999-01-17 22:11:59 +03:00
1999-08-10 23:04:08 +04:00
/**
* Less common routines and SAX interfaces
1999-01-17 22:11:59 +03:00
*/
1999-08-30 01:02:19 +04:00
int xmlParseDocument ( xmlParserCtxtPtr ctxt ) ;
xmlDocPtr xmlSAXParseDoc ( xmlSAXHandlerPtr sax ,
1999-09-24 02:19:22 +04:00
xmlChar * cur ,
1999-08-30 01:02:19 +04:00
int recovery ) ;
1999-09-26 15:31:02 +04:00
int xmlSAXUserParseFile ( xmlSAXHandlerPtr sax ,
void * user_data ,
const char * filename ) ;
int xmlSAXUserParseMemory ( xmlSAXHandlerPtr sax ,
void * user_data ,
char * buffer ,
int size ) ;
1999-08-30 01:02:19 +04:00
xmlDocPtr xmlSAXParseMemory ( xmlSAXHandlerPtr sax ,
char * buffer ,
int size ,
int recovery ) ;
xmlDocPtr xmlSAXParseFile ( xmlSAXHandlerPtr sax ,
const char * filename ,
int recovery ) ;
1999-09-24 02:19:22 +04:00
xmlDtdPtr xmlParseDTD ( const xmlChar * ExternalID ,
const xmlChar * SystemID ) ;
1999-08-30 01:02:19 +04:00
xmlDtdPtr xmlSAXParseDTD ( xmlSAXHandlerPtr sax ,
1999-09-24 02:19:22 +04:00
const xmlChar * ExternalID ,
const xmlChar * SystemID ) ;
1999-12-28 19:35:14 +03:00
/**
* SAX initialization routines
*/
void xmlDefaultSAXHandlerInit ( void ) ;
void htmlDefaultSAXHandlerInit ( void ) ;
/**
* Parser contexts handling .
*/
1999-08-30 01:02:19 +04:00
void xmlInitParserCtxt ( xmlParserCtxtPtr ctxt ) ;
void xmlClearParserCtxt ( xmlParserCtxtPtr ctxt ) ;
1999-12-28 19:35:14 +03:00
void xmlFreeParserCtxt ( xmlParserCtxtPtr ctxt ) ;
1999-08-30 01:02:19 +04:00
void xmlSetupParserForBuffer ( xmlParserCtxtPtr ctxt ,
1999-09-24 02:19:22 +04:00
const xmlChar * buffer ,
1999-08-30 01:02:19 +04:00
const char * filename ) ;
1999-12-28 19:35:14 +03:00
xmlParserCtxtPtr xmlCreateDocParserCtxt ( xmlChar * cur ) ;
/**
* Interfaces for the Push mode
*/
xmlParserCtxtPtr xmlCreatePushParserCtxt ( xmlSAXHandlerPtr sax ,
void * user_data ,
const char * chunk ,
int size ,
const char * filename ) ;
int xmlParseChunk ( xmlParserCtxtPtr ctxt ,
const char * chunk ,
int size ,
int terminate ) ;
1999-08-30 01:02:19 +04:00
/**
* Node infos
*/
const xmlParserNodeInfo *
xmlParserFindNodeInfo ( const xmlParserCtxt * ctxt ,
1999-02-22 13:33:01 +03:00
const xmlNode * node ) ;
1999-08-30 01:02:19 +04:00
void xmlInitNodeInfoSeq ( xmlParserNodeInfoSeqPtr seq ) ;
void xmlClearNodeInfoSeq ( xmlParserNodeInfoSeqPtr seq ) ;
1998-08-13 07:39:55 +04:00
unsigned long xmlParserFindNodeInfoIndex ( const xmlParserNodeInfoSeq * seq ,
const xmlNode * node ) ;
1999-08-30 01:02:19 +04:00
void xmlParserAddNodeInfo ( xmlParserCtxtPtr ctxt ,
const xmlParserNodeInfo * info ) ;
/*
* External entities handling actually implemented in xmlIO
*/
1998-08-13 07:39:55 +04:00
1999-08-30 01:02:19 +04:00
void xmlSetExternalEntityLoader ( xmlExternalEntityLoader f ) ;
xmlExternalEntityLoader
xmlGetExternalEntityLoader ( void ) ;
xmlParserInputPtr
xmlLoadExternalEntity ( const char * URL ,
const char * ID ,
2000-01-03 14:08:02 +03:00
xmlParserCtxtPtr context ) ;
1999-12-12 16:03:50 +03:00
1998-08-13 07:39:55 +04:00
# ifdef __cplusplus
}
# endif
# endif /* __XML_PARSER_H__ */