parser

parser —

Synopsis




#define     XML_DEFAULT_VERSION
void        (*xmlParserInputDeallocate)     (xmlChar *str);
struct      xmlParserInput;
struct      xmlParserNodeInfo;
typedef     xmlParserNodeInfoPtr;
struct      xmlParserNodeInfoSeq;
typedef     xmlParserNodeInfoSeqPtr;
enum        xmlParserInputState;
#define     XML_DETECT_IDS
#define     XML_COMPLETE_ATTRS
#define     XML_SKIP_IDS
struct      xmlParserCtxt;
struct      xmlSAXLocator;
xmlParserInputPtr (*resolveEntitySAXFunc)   (void *ctx,
                                             const xmlChar *publicId,
                                             const xmlChar *systemId);
void        (*internalSubsetSAXFunc)        (void *ctx,
                                             const xmlChar *name,
                                             const xmlChar *ExternalID,
                                             const xmlChar *SystemID);
void        (*externalSubsetSAXFunc)        (void *ctx,
                                             const xmlChar *name,
                                             const xmlChar *ExternalID,
                                             const xmlChar *SystemID);
xmlEntityPtr (*getEntitySAXFunc)            (void *ctx,
                                             const xmlChar *name);
xmlEntityPtr (*getParameterEntitySAXFunc)   (void *ctx,
                                             const xmlChar *name);
void        (*entityDeclSAXFunc)            (void *ctx,
                                             const xmlChar *name,
                                             int type,
                                             const xmlChar *publicId,
                                             const xmlChar *systemId,
                                             xmlChar *content);
void        (*notationDeclSAXFunc)          (void *ctx,
                                             const xmlChar *name,
                                             const xmlChar *publicId,
                                             const xmlChar *systemId);
void        (*attributeDeclSAXFunc)         (void *ctx,
                                             const xmlChar *elem,
                                             const xmlChar *fullname,
                                             int type,
                                             int def,
                                             const xmlChar *defaultValue,
                                             xmlEnumerationPtr tree);
void        (*elementDeclSAXFunc)           (void *ctx,
                                             const xmlChar *name,
                                             int type,
                                             xmlElementContentPtr content);
void        (*unparsedEntityDeclSAXFunc)    (void *ctx,
                                             const xmlChar *name,
                                             const xmlChar *publicId,
                                             const xmlChar *systemId,
                                             const xmlChar *notationName);
void        (*setDocumentLocatorSAXFunc)    (void *ctx,
                                             xmlSAXLocatorPtr loc);
void        (*startDocumentSAXFunc)         (void *ctx);
void        (*endDocumentSAXFunc)           (void *ctx);
void        (*startElementSAXFunc)          (void *ctx,
                                             const xmlChar *name,
                                             const xmlChar **atts);
void        (*endElementSAXFunc)            (void *ctx,
                                             const xmlChar *name);
void        (*attributeSAXFunc)             (void *ctx,
                                             const xmlChar *name,
                                             const xmlChar *value);
void        (*referenceSAXFunc)             (void *ctx,
                                             const xmlChar *name);
void        (*charactersSAXFunc)            (void *ctx,
                                             const xmlChar *ch,
                                             int len);
void        (*ignorableWhitespaceSAXFunc)   (void *ctx,
                                             const xmlChar *ch,
                                             int len);
void        (*processingInstructionSAXFunc) (void *ctx,
                                             const xmlChar *target,
                                             const xmlChar *data);
void        (*commentSAXFunc)               (void *ctx,
                                             const xmlChar *value);
void        (*cdataBlockSAXFunc)            (void *ctx,
                                             const xmlChar *value,
                                             int len);
void        (*warningSAXFunc)               (void *ctx,
                                             const char *msg,
                                             ...);
void        (*errorSAXFunc)                 (void *ctx,
                                             const char *msg,
                                             ...);
void        (*fatalErrorSAXFunc)            (void *ctx,
                                             const char *msg,
                                             ...);
int         (*isStandaloneSAXFunc)          (void *ctx);
int         (*hasInternalSubsetSAXFunc)     (void *ctx);
int         (*hasExternalSubsetSAXFunc)     (void *ctx);
#define     XML_SAX2_MAGIC
void        (*startElementNsSAX2Func)       (void *ctx,
                                             const xmlChar *localname,
                                             const xmlChar *prefix,
                                             const xmlChar *URI,
                                             int nb_namespaces,
                                             const xmlChar **namespaces,
                                             int nb_attributes,
                                             int nb_defaulted,
                                             const xmlChar **attributes);
void        (*endElementNsSAX2Func)         (void *ctx,
                                             const xmlChar *localname,
                                             const xmlChar *prefix,
                                             const xmlChar *URI);
struct      xmlSAXHandler;
struct      xmlSAXHandlerV1;
typedef     xmlSAXHandlerV1Ptr;
xmlParserInputPtr (*xmlExternalEntityLoader)
                                            (const char *URL,
                                             const char *ID,
                                             xmlParserCtxtPtr context);
void        xmlInitParser                   (void);
void        xmlCleanupParser                (void);
int         xmlParserInputRead              (xmlParserInputPtr in,
                                             int len);
int         xmlParserInputGrow              (xmlParserInputPtr in,
                                             int len);
xmlChar*    xmlStrdup                       (const xmlChar *cur);
xmlChar*    xmlStrndup                      (const xmlChar *cur,
                                             int len);
xmlChar*    xmlCharStrndup                  (const char *cur,
                                             int len);
xmlChar*    xmlCharStrdup                   (const char *cur);
xmlChar*    xmlStrsub                       (const xmlChar *str,
                                             int start,
                                             int len);
const xmlChar* xmlStrchr                    (const xmlChar *str,
                                             xmlChar val);
const xmlChar* xmlStrstr                    (const xmlChar *str,
                                             const xmlChar *val);
const xmlChar* xmlStrcasestr                (const xmlChar *str,
                                             xmlChar *val);
int         xmlStrcmp                       (const xmlChar *str1,
                                             const xmlChar *str2);
int         xmlStrncmp                      (const xmlChar *str1,
                                             const xmlChar *str2,
                                             int len);
int         xmlStrcasecmp                   (const xmlChar *str1,
                                             const xmlChar *str2);
int         xmlStrncasecmp                  (const xmlChar *str1,
                                             const xmlChar *str2,
                                             int len);
int         xmlStrEqual                     (const xmlChar *str1,
                                             const xmlChar *str2);
int         xmlStrQEqual                    (const xmlChar *pref,
                                             const xmlChar *name,
                                             const xmlChar *str);
int         xmlStrlen                       (const xmlChar *str);
xmlChar*    xmlStrcat                       (xmlChar *cur,
                                             const xmlChar *add);
xmlChar*    xmlStrncat                      (xmlChar *cur,
                                             const xmlChar *add,
                                             int len);
int         xmlStrPrintf                    (xmlChar *buf,
                                             int len,
                                             const xmlChar *msg,
                                             ...);
int         xmlStrVPrintf                   (xmlChar *buf,
                                             int len,
                                             const xmlChar *msg,
                                             va_list ap);
xmlDocPtr   xmlParseDoc                     (xmlChar *cur);
xmlDocPtr   xmlParseMemory                  (const char *buffer,
                                             int size);
xmlDocPtr   xmlParseFile                    (const char *filename);
int         xmlSubstituteEntitiesDefault    (int val);
int         xmlKeepBlanksDefault            (int val);
void        xmlStopParser                   (xmlParserCtxtPtr ctxt);
int         xmlPedanticParserDefault        (int val);
int         xmlLineNumbersDefault           (int val);
xmlDocPtr   xmlRecoverDoc                   (xmlChar *cur);
xmlDocPtr   xmlRecoverMemory                (const char *buffer,
                                             int size);
xmlDocPtr   xmlRecoverFile                  (const char *filename);
int         xmlParseDocument                (xmlParserCtxtPtr ctxt);
int         xmlParseExtParsedEnt            (xmlParserCtxtPtr ctxt);
xmlDocPtr   xmlSAXParseDoc                  (xmlSAXHandlerPtr sax,
                                             xmlChar *cur,
                                             int recovery);
int         xmlSAXUserParseFile             (xmlSAXHandlerPtr sax,
                                             void *user_data,
                                             const char *filename);
int         xmlSAXUserParseMemory           (xmlSAXHandlerPtr sax,
                                             void *user_data,
                                             const char *buffer,
                                             int size);
xmlDocPtr   xmlSAXParseMemory               (xmlSAXHandlerPtr sax,
                                             const char *buffer,
                                             int size,
                                             int recovery);
xmlDocPtr   xmlSAXParseMemoryWithData       (xmlSAXHandlerPtr sax,
                                             const char *buffer,
                                             int size,
                                             int recovery,
                                             void *data);
xmlDocPtr   xmlSAXParseFile                 (xmlSAXHandlerPtr sax,
                                             const char *filename,
                                             int recovery);
xmlDocPtr   xmlSAXParseFileWithData         (xmlSAXHandlerPtr sax,
                                             const char *filename,
                                             int recovery,
                                             void *data);
xmlDocPtr   xmlSAXParseEntity               (xmlSAXHandlerPtr sax,
                                             const char *filename);
xmlDocPtr   xmlParseEntity                  (const char *filename);
xmlDtdPtr   xmlParseDTD                     (const xmlChar *ExternalID,
                                             const xmlChar *SystemID);
xmlDtdPtr   xmlSAXParseDTD                  (xmlSAXHandlerPtr sax,
                                             const xmlChar *ExternalID,
                                             const xmlChar *SystemID);
xmlDtdPtr   xmlIOParseDTD                   (xmlSAXHandlerPtr sax,
                                             xmlParserInputBufferPtr input,
                                             xmlCharEncoding enc);
int         xmlParseBalancedChunkMemory     (xmlDocPtr doc,
                                             xmlSAXHandlerPtr sax,
                                             void *user_data,
                                             int depth,
                                             const xmlChar *string,
                                             xmlNodePtr *lst);
int         xmlParseBalancedChunkMemoryRecover
                                            (xmlDocPtr doc,
                                             xmlSAXHandlerPtr sax,
                                             void *user_data,
                                             int depth,
                                             const xmlChar *string,
                                             xmlNodePtr *lst,
                                             int recover);
int         xmlParseExternalEntity          (xmlDocPtr doc,
                                             xmlSAXHandlerPtr sax,
                                             void *user_data,
                                             int depth,
                                             const xmlChar *URL,
                                             const xmlChar *ID,
                                             xmlNodePtr *lst);
int         xmlParseCtxtExternalEntity      (xmlParserCtxtPtr ctx,
                                             const xmlChar *URL,
                                             const xmlChar *ID,
                                             xmlNodePtr *lst);
xmlParserCtxtPtr xmlNewParserCtxt           (void);
int         xmlInitParserCtxt               (xmlParserCtxtPtr ctxt);
void        xmlClearParserCtxt              (xmlParserCtxtPtr ctxt);
void        xmlFreeParserCtxt               (xmlParserCtxtPtr ctxt);
void        xmlSetupParserForBuffer         (xmlParserCtxtPtr ctxt,
                                             const xmlChar *buffer,
                                             const char *filename);
xmlParserCtxtPtr xmlCreateDocParserCtxt     (const xmlChar *cur);
int         xmlGetFeaturesList              (int *len,
                                             const char **result);
int         xmlGetFeature                   (xmlParserCtxtPtr ctxt,
                                             const char *name,
                                             void *result);
int         xmlSetFeature                   (xmlParserCtxtPtr ctxt,
                                             const char *name,
                                             void *value);
xmlParserCtxtPtr xmlCreatePushParserCtxt    (xmlSAXHandlerPtr sax,
                                             void *user_data,
                                             const char *chunk,
                                             int size,
                                             const char *filename);
int         xmlParseChunk                   (xmlParserCtxtPtr ctxt,
                                             const char *chunk,
                                             int size,
                                             int terminate);
xmlParserCtxtPtr xmlCreateIOParserCtxt      (xmlSAXHandlerPtr sax,
                                             void *user_data,
                                             xmlInputReadCallback ioread,
                                             xmlInputCloseCallback ioclose,
                                             void *ioctx,
                                             xmlCharEncoding enc);
xmlParserInputPtr xmlNewIOInputStream       (xmlParserCtxtPtr ctxt,
                                             xmlParserInputBufferPtr input,
                                             xmlCharEncoding enc);
const xmlParserNodeInfo* xmlParserFindNodeInfo
                                            (const xmlParserCtxtPtr ctxt,
                                             const xmlNodePtr node);
void        xmlInitNodeInfoSeq              (xmlParserNodeInfoSeqPtr seq);
void        xmlClearNodeInfoSeq             (xmlParserNodeInfoSeqPtr seq);
void        xmlParserAddNodeInfo            (xmlParserCtxtPtr ctxt,
                                             const xmlParserNodeInfoPtr info);
void        xmlSetExternalEntityLoader      (xmlExternalEntityLoader f);
xmlExternalEntityLoader xmlGetExternalEntityLoader
                                            (void);
xmlParserInputPtr xmlLoadExternalEntity     (const char *URL,
                                             const char *ID,
                                             xmlParserCtxtPtr ctxt);
enum        xmlParserOption;
void        xmlCtxtReset                    (xmlParserCtxtPtr ctxt);
int         xmlCtxtResetPush                (xmlParserCtxtPtr ctxt,
                                             const char *chunk,
                                             int size,
                                             const char *filename,
                                             const char *encoding);
int         xmlCtxtUseOptions               (xmlParserCtxtPtr ctxt,
                                             int options);
xmlDocPtr   xmlReadDoc                      (const xmlChar *cur,
                                             const char *URL,
                                             const char *encoding,
                                             int options);
xmlDocPtr   xmlReadFile                     (const char *URL,
                                             const char *encoding,
                                             int options);
xmlDocPtr   xmlReadMemory                   (const char *buffer,
                                             int size,
                                             const char *URL,
                                             const char *encoding,
                                             int options);
xmlDocPtr   xmlReadFd                       (int fd,
                                             const char *URL,
                                             const char *encoding,
                                             int options);
xmlDocPtr   xmlReadIO                       (xmlInputReadCallback ioread,
                                             xmlInputCloseCallback ioclose,
                                             void *ioctx,
                                             const char *URL,
                                             const char *encoding,
                                             int options);
xmlDocPtr   xmlCtxtReadDoc                  (xmlParserCtxtPtr ctxt,
                                             const xmlChar *cur,
                                             const char *URL,
                                             const char *encoding,
                                             int options);
xmlDocPtr   xmlCtxtReadFile                 (xmlParserCtxtPtr ctxt,
                                             const char *filename,
                                             const char *encoding,
                                             int options);
xmlDocPtr   xmlCtxtReadMemory               (xmlParserCtxtPtr ctxt,
                                             const char *buffer,
                                             int size,
                                             const char *URL,
                                             const char *encoding,
                                             int options);
xmlDocPtr   xmlCtxtReadFd                   (xmlParserCtxtPtr ctxt,
                                             int fd,
                                             const char *URL,
                                             const char *encoding,
                                             int options);
xmlDocPtr   xmlCtxtReadIO                   (xmlParserCtxtPtr ctxt,
                                             xmlInputReadCallback ioread,
                                             xmlInputCloseCallback ioclose,
                                             void *ioctx,
                                             const char *URL,
                                             const char *encoding,
                                             int options);

Description

Details

XML_DEFAULT_VERSION

#define XML_DEFAULT_VERSION	"1.0"

The default version of XML used: 1.0


xmlParserInputDeallocate ()

void        (*xmlParserInputDeallocate)     (xmlChar *str);

Callback for freeing some parser input allocations.

str: the string to deallocate

struct xmlParserInput

struct xmlParserInput {

    /* Input buffer */
    xmlParserInputBufferPtr buf;      /* UTF-8 encoded buffer */

    const char *filename;             /* The file analyzed, if any */
    const char *directory;            /* the directory/base of the file */
    const xmlChar *base;              /* Base of the array to parse */
    const xmlChar *cur;               /* Current char being parsed */
    const xmlChar *end;               /* end of the array to parse */
    int length;                       /* length if known */
    int line;                         /* Current line */
    int col;                          /* Current column */
    /*
     * NOTE: consumed is only tested for equality in the parser code,
     *       so even if there is an overflow this should not give troubles
     *       for parsing very large instances.
     */
    unsigned long consumed;           /* How many xmlChars already consumed */
    xmlParserInputDeallocate free;    /* function to deallocate the base */
    const xmlChar *encoding;          /* the encoding string for entity */
    const xmlChar *version;           /* the version string for entity */
    int standalone;                   /* Was that entity marked standalone */
    int id;                           /* an unique identifier for the entity */
};

An xmlParserInput is an input flow for the XML processor. Each entity parsed is associated an xmlParserInput (except the few predefined ones). This is the case both for internal entities - in which case the flow is already completely in memory - or external entities - in which case we use the buf structure for progressive reading and I18N conversions to the internal UTF-8 format.


struct xmlParserNodeInfo

struct xmlParserNodeInfo {

  const struct _xmlNode* node;
  /* Position & line # that text that created the node begins & ends on */
  unsigned long begin_pos;
  unsigned long begin_line;
  unsigned long end_pos;
  unsigned long end_line;
};

The parser can be asked to collect Node informations, i.e. at what place in the file they were detected. NOTE: This is off by default and not very well tested.


xmlParserNodeInfoPtr

typedef xmlParserNodeInfo *xmlParserNodeInfoPtr;


struct xmlParserNodeInfoSeq

struct xmlParserNodeInfoSeq {

  unsigned long maximum;
  unsigned long length;
  xmlParserNodeInfo* buffer;
};


xmlParserNodeInfoSeqPtr

typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr;


enum xmlParserInputState

typedef enum {
    XML_PARSER_EOF = -1,	/* nothing is to be parsed */
    XML_PARSER_START = 0,	/* nothing has been parsed */
    XML_PARSER_MISC,		/* Misc* before int subset */
    XML_PARSER_PI,		/* Within a processing instruction */
    XML_PARSER_DTD,		/* within some DTD content */
    XML_PARSER_PROLOG,		/* Misc* after internal subset */
    XML_PARSER_COMMENT,		/* within a comment */
    XML_PARSER_START_TAG,	/* within a start tag */
    XML_PARSER_CONTENT,		/* within the content */
    XML_PARSER_CDATA_SECTION,	/* within a CDATA section */
    XML_PARSER_END_TAG,		/* within a closing tag */
    XML_PARSER_ENTITY_DECL,	/* within an entity declaration */
    XML_PARSER_ENTITY_VALUE,	/* within an entity value in a decl */
    XML_PARSER_ATTRIBUTE_VALUE,	/* within an attribute value */
    XML_PARSER_SYSTEM_LITERAL,	/* within a SYSTEM value */
    XML_PARSER_EPILOG, 		/* the Misc* after the last end tag */
    XML_PARSER_IGNORE,		/* within an IGNORED section */
    XML_PARSER_PUBLIC_LITERAL 	/* within a PUBLIC value */
} xmlParserInputState;

The parser is now working also as a state based parser. The recursive one use the state info for entities processing.


XML_DETECT_IDS

#define XML_DETECT_IDS		2

Bit in the loadsubset context field to tell to do ID/REFs lookups. Use it to initialize xmlLoadExtDtdDefaultValue.


XML_COMPLETE_ATTRS

#define XML_COMPLETE_ATTRS	4

Bit in the loadsubset context field to tell to do complete the elements attributes lists with the ones defaulted from the DTDs. Use it to initialize xmlLoadExtDtdDefaultValue.


XML_SKIP_IDS

#define XML_SKIP_IDS		8

Bit in the loadsubset context field to tell to not do ID/REFs registration. Used to initialize xmlLoadExtDtdDefaultValue in some special cases.


struct xmlParserCtxt

struct xmlParserCtxt {

    struct _xmlSAXHandler *sax;       /* The SAX handler */
    void            *userData;        /* For SAX interface only, used by DOM build */
    xmlDocPtr           myDoc;        /* the document being built */
    int            wellFormed;        /* is the document well formed */
    int       replaceEntities;        /* shall we replace entities ? */
    const xmlChar    *version;        /* the XML version string */
    const xmlChar   *encoding;        /* the declared encoding, if any */
    int            standalone;        /* standalone document */
    int                  html;        /* an HTML(1)/Docbook(2) document */

    /* Input stream stack */
    xmlParserInputPtr  input;         /* Current input stream */
    int                inputNr;       /* Number of current input streams */
    int                inputMax;      /* Max number of input streams */
    xmlParserInputPtr *inputTab;      /* stack of inputs */

    /* Node analysis stack only used for DOM building */
    xmlNodePtr         node;          /* Current parsed Node */
    int                nodeNr;        /* Depth of the parsing stack */
    int                nodeMax;       /* Max depth of the parsing stack */
    xmlNodePtr        *nodeTab;       /* array of nodes */

    int record_info;                  /* Whether node info should be kept */
    xmlParserNodeInfoSeq node_seq;    /* info about each node parsed */

    int errNo;                        /* error code */

    int     hasExternalSubset;        /* reference and external subset */
    int             hasPErefs;        /* the internal subset has PE refs */
    int              external;        /* are we parsing an external entity */

    int                 valid;        /* is the document valid */
    int              validate;        /* shall we try to validate ? */
    xmlValidCtxt        vctxt;        /* The validity context */

    xmlParserInputState instate;      /* current type of input */
    int                 token;        /* next char look-ahead */    

    char           *directory;        /* the data directory */

    /* Node name stack */
    const xmlChar     *name;          /* Current parsed Node */
    int                nameNr;        /* Depth of the parsing stack */
    int                nameMax;       /* Max depth of the parsing stack */
    const xmlChar *   *nameTab;       /* array of nodes */

    long               nbChars;       /* number of xmlChar processed */
    long            checkIndex;       /* used by progressive parsing lookup */
    int             keepBlanks;       /* ugly but ... */
    int             disableSAX;       /* SAX callbacks are disabled */
    int               inSubset;       /* Parsing is in int 1/ext 2 subset */
    const xmlChar *    intSubName;    /* name of subset */
    xmlChar *          extSubURI;     /* URI of external subset */
    xmlChar *          extSubSystem;  /* SYSTEM ID of external subset */

    /* xml:space values */
    int *              space;         /* Should the parser preserve spaces */
    int                spaceNr;       /* Depth of the parsing stack */
    int                spaceMax;      /* Max depth of the parsing stack */
    int *              spaceTab;      /* array of space infos */

    int                depth;         /* to prevent entity substitution loops */
    xmlParserInputPtr  entity;        /* used to check entities boundaries */
    int                charset;       /* encoding of the in-memory content
				         actually an xmlCharEncoding */
    int                nodelen;       /* Those two fields are there to */
    int                nodemem;       /* Speed up large node parsing */
    int                pedantic;      /* signal pedantic warnings */
    void              *_private;      /* For user data, libxml won't touch it */

    int                loadsubset;    /* should the external subset be loaded */
    int                linenumbers;   /* set line number in element content */
    void              *catalogs;       /* document's own catalog */
    int                recovery;      /* run in recovery mode */
    int                progressive;   /* is this a progressive parsing */
    xmlDictPtr         dict;          /* dictionnary for the parser */
    const xmlChar *   *atts;          /* array for the attributes callbacks */
    int                maxatts;       /* the size of the array */
    int                docdict;       /* use strings from dict to build tree */

    /*
     * pre-interned strings
     */
    const xmlChar *str_xml;
    const xmlChar *str_xmlns;
    const xmlChar *str_xml_ns;

    /*
     * Everything below is used only by the new SAX mode
     */
    int                sax2;          /* operating in the new SAX mode */
    int                nsNr;          /* the number of inherited namespaces */
    int                nsMax;         /* the size of the arrays */
    const xmlChar *   *nsTab;         /* the array of prefix/namespace name */
    int               *attallocs;     /* which attribute were allocated */
    void *            *pushTab;       /* array of data for push */
    xmlHashTablePtr    attsDefault;   /* defaulted attributes if any */
    xmlHashTablePtr    attsSpecial;   /* non-CDATA attributes if any */
    int                nsWellFormed;  /* is the document XML Nanespace okay */
    int                options;       /* Extra options */

    /*
     * Those fields are needed only for treaming parsing so far
     */
    int               dictNames;    /* Use dictionary names for the tree */
    int               freeElemsNr;  /* number of freed element nodes */
    xmlNodePtr        freeElems;    /* List of freed element nodes */
    int               freeAttrsNr;  /* number of freed attributes nodes */
    xmlAttrPtr        freeAttrs;    /* List of freed attributes nodes */

    /*
     * the complete error informations for the last error.
     */
    xmlError          lastError;
};

The parser context. NOTE This doesn't completely define the parser state, the (current ?) design of the parser uses recursive function calls since this allow and easy mapping from the production rules of the specification to the actual code. The drawback is that the actual function call also reflect the parser state. However most of the parsing routines takes as the only argument the parser context pointer, so migrating to a state based parser for progressive parsing shouldn't be too hard.


struct xmlSAXLocator

struct xmlSAXLocator {

    const xmlChar *(*getPublicId)(void *ctx);
    const xmlChar *(*getSystemId)(void *ctx);
    int (*getLineNumber)(void *ctx);
    int (*getColumnNumber)(void *ctx);
};

A SAX Locator.


resolveEntitySAXFunc ()

xmlParserInputPtr (*resolveEntitySAXFunc)   (void *ctx,
                                             const xmlChar *publicId,
                                             const xmlChar *systemId);

Callback: The entity loader, to control the loading of external entities, the application can either: - override this resolveEntity() callback in the SAX block - or better use the xmlSetExternalEntityLoader() function to set up it's own entity resolution routine

ctx: the user data (XML parser context)
publicId: The public ID of the entity
systemId: The system ID of the entity
Returns :the xmlParserInputPtr if inlined or NULL for DOM behaviour.

internalSubsetSAXFunc ()

void        (*internalSubsetSAXFunc)        (void *ctx,
                                             const xmlChar *name,
                                             const xmlChar *ExternalID,
                                             const xmlChar *SystemID);

Callback on internal subset declaration.

ctx: the user data (XML parser context)
name: the root element name
ExternalID: the external ID
SystemID: the SYSTEM ID (e.g. filename or URL)

externalSubsetSAXFunc ()

void        (*externalSubsetSAXFunc)        (void *ctx,
                                             const xmlChar *name,
                                             const xmlChar *ExternalID,
                                             const xmlChar *SystemID);

Callback on external subset declaration.

ctx: the user data (XML parser context)
name: the root element name
ExternalID: the external ID
SystemID: the SYSTEM ID (e.g. filename or URL)

getEntitySAXFunc ()

xmlEntityPtr (*getEntitySAXFunc)            (void *ctx,
                                             const xmlChar *name);

Get an entity by name.

ctx: the user data (XML parser context)
name: The entity name
Returns :the xmlEntityPtr if found.

getParameterEntitySAXFunc ()

xmlEntityPtr (*getParameterEntitySAXFunc)   (void *ctx,
                                             const xmlChar *name);

Get a parameter entity by name.

ctx: the user data (XML parser context)
name: The entity name
Returns :the xmlEntityPtr if found.

entityDeclSAXFunc ()

void        (*entityDeclSAXFunc)            (void *ctx,
                                             const xmlChar *name,
                                             int type,
                                             const xmlChar *publicId,
                                             const xmlChar *systemId,
                                             xmlChar *content);

An entity definition has been parsed.

ctx: the user data (XML parser context)
name: the entity name
type: the entity type
publicId: The public ID of the entity
systemId: The system ID of the entity
content: the entity value (without processing).

notationDeclSAXFunc ()

void        (*notationDeclSAXFunc)          (void *ctx,
                                             const xmlChar *name,
                                             const xmlChar *publicId,
                                             const xmlChar *systemId);

What to do when a notation declaration has been parsed.

ctx: the user data (XML parser context)
name: The name of the notation
publicId: The public ID of the entity
systemId: The system ID of the entity

attributeDeclSAXFunc ()

void        (*attributeDeclSAXFunc)         (void *ctx,
                                             const xmlChar *elem,
                                             const xmlChar *fullname,
                                             int type,
                                             int def,
                                             const xmlChar *defaultValue,
                                             xmlEnumerationPtr tree);

An attribute definition has been parsed.

ctx: the user data (XML parser context)
elem: the name of the element
fullname: the attribute name
type: the attribute type
def: the type of default value
defaultValue: the attribute default value
tree: the tree of enumerated value set

elementDeclSAXFunc ()

void        (*elementDeclSAXFunc)           (void *ctx,
                                             const xmlChar *name,
                                             int type,
                                             xmlElementContentPtr content);

An element definition has been parsed.

ctx: the user data (XML parser context)
name: the element name
type: the element type
content: the element value tree

unparsedEntityDeclSAXFunc ()

void        (*unparsedEntityDeclSAXFunc)    (void *ctx,
                                             const xmlChar *name,
                                             const xmlChar *publicId,
                                             const xmlChar *systemId,
                                             const xmlChar *notationName);

What to do when an unparsed entity declaration is parsed.

ctx: the user data (XML parser context)
name: The name of the entity
publicId: The public ID of the entity
systemId: The system ID of the entity
notationName: the name of the notation

setDocumentLocatorSAXFunc ()

void        (*setDocumentLocatorSAXFunc)    (void *ctx,
                                             xmlSAXLocatorPtr loc);

Receive the document locator at startup, actually xmlDefaultSAXLocator. Everything is available on the context, so this is useless in our case.

ctx: the user data (XML parser context)
loc: A SAX Locator

startDocumentSAXFunc ()

void        (*startDocumentSAXFunc)         (void *ctx);

Called when the document start being processed.

ctx: the user data (XML parser context)

endDocumentSAXFunc ()

void        (*endDocumentSAXFunc)           (void *ctx);

Called when the document end has been detected.

ctx: the user data (XML parser context)

startElementSAXFunc ()

void        (*startElementSAXFunc)          (void *ctx,
                                             const xmlChar *name,
                                             const xmlChar **atts);

Called when an opening tag has been processed.

ctx: the user data (XML parser context)
name: The element name, including namespace prefix
atts: An array of name/value attributes pairs, NULL terminated

endElementSAXFunc ()

void        (*endElementSAXFunc)            (void *ctx,
                                             const xmlChar *name);

Called when the end of an element has been detected.

ctx: the user data (XML parser context)
name: The element name

attributeSAXFunc ()

void        (*attributeSAXFunc)             (void *ctx,
                                             const xmlChar *name,
                                             const xmlChar *value);

Handle an attribute that has been read by the parser. The default handling is to convert the attribute into an DOM subtree and past it in a new xmlAttr element added to the element.

ctx: the user data (XML parser context)
name: The attribute name, including namespace prefix
value: The attribute value

referenceSAXFunc ()

void        (*referenceSAXFunc)             (void *ctx,
                                             const xmlChar *name);

Called when an entity reference is detected.

ctx: the user data (XML parser context)
name: The entity name

charactersSAXFunc ()

void        (*charactersSAXFunc)            (void *ctx,
                                             const xmlChar *ch,
                                             int len);

Receiving some chars from the parser.

ctx: the user data (XML parser context)
ch: a xmlChar string
len: the number of xmlChar

ignorableWhitespaceSAXFunc ()

void        (*ignorableWhitespaceSAXFunc)   (void *ctx,
                                             const xmlChar *ch,
                                             int len);

Receiving some ignorable whitespaces from the parser. UNUSED: by default the DOM building will use characters.

ctx: the user data (XML parser context)
ch: a xmlChar string
len: the number of xmlChar

processingInstructionSAXFunc ()

void        (*processingInstructionSAXFunc) (void *ctx,
                                             const xmlChar *target,
                                             const xmlChar *data);

A processing instruction has been parsed.

ctx: the user data (XML parser context)
target: the target name
data: the PI data's

commentSAXFunc ()

void        (*commentSAXFunc)               (void *ctx,
                                             const xmlChar *value);

A comment has been parsed.

ctx: the user data (XML parser context)
value: the comment content

cdataBlockSAXFunc ()

void        (*cdataBlockSAXFunc)            (void *ctx,
                                             const xmlChar *value,
                                             int len);

Called when a pcdata block has been parsed.

ctx: the user data (XML parser context)
value: The pcdata content
len: the block length

warningSAXFunc ()

void        (*warningSAXFunc)               (void *ctx,
                                             const char *msg,
                                             ...);

Display and format a warning messages, callback.

ctx: an XML parser context
msg: the message to display/transmit
...: extra parameters for the message display

errorSAXFunc ()

void        (*errorSAXFunc)                 (void *ctx,
                                             const char *msg,
                                             ...);

Display and format an error messages, callback.

ctx: an XML parser context
msg: the message to display/transmit
...: extra parameters for the message display

fatalErrorSAXFunc ()

void        (*fatalErrorSAXFunc)            (void *ctx,
                                             const char *msg,
                                             ...);

Display and format fatal error messages, callback. Note: so far fatalError() SAX callbacks are not used, error() get all the callbacks for errors.

ctx: an XML parser context
msg: the message to display/transmit
...: extra parameters for the message display

isStandaloneSAXFunc ()

int         (*isStandaloneSAXFunc)          (void *ctx);

Is this document tagged standalone?

ctx: the user data (XML parser context)
Returns :1 if true

hasInternalSubsetSAXFunc ()

int         (*hasInternalSubsetSAXFunc)     (void *ctx);

Does this document has an internal subset.

ctx: the user data (XML parser context)
Returns :1 if true

hasExternalSubsetSAXFunc ()

int         (*hasExternalSubsetSAXFunc)     (void *ctx);

Does this document has an external subset?

ctx: the user data (XML parser context)
Returns :1 if true

XML_SAX2_MAGIC

#define XML_SAX2_MAGIC 0xDEEDBEAF

Special constant found in SAX2 blocks initialized fields


startElementNsSAX2Func ()

void        (*startElementNsSAX2Func)       (void *ctx,
                                             const xmlChar *localname,
                                             const xmlChar *prefix,
                                             const xmlChar *URI,
                                             int nb_namespaces,
                                             const xmlChar **namespaces,
                                             int nb_attributes,
                                             int nb_defaulted,
                                             const xmlChar **attributes);

SAX2 callback when an element start has been detected by the parser. It provides the namespace informations for the element, as well as the new namespace declarations on the element.

ctx: the user data (XML parser context)
localname: the local name of the element
prefix: the element namespace prefix if available
URI: the element namespace name if available
nb_namespaces: number of namespace definitions on that node
namespaces: pointer to the array of prefix/URI pairs namespace definitions
nb_attributes: the number of attributes on that node
nb_defaulted: the number of defaulted attributes. The defaulted ones are at the end of the array
attributes: pointer to the array of (localname/prefix/URI/value/end) attribute values.

endElementNsSAX2Func ()

void        (*endElementNsSAX2Func)         (void *ctx,
                                             const xmlChar *localname,
                                             const xmlChar *prefix,
                                             const xmlChar *URI);

SAX2 callback when an element end has been detected by the parser. It provides the namespace informations for the element.

ctx: the user data (XML parser context)
localname: the local name of the element
prefix: the element namespace prefix if available
URI: the element namespace name if available

struct xmlSAXHandler

struct xmlSAXHandler {

    internalSubsetSAXFunc internalSubset;
    isStandaloneSAXFunc isStandalone;
    hasInternalSubsetSAXFunc hasInternalSubset;
    hasExternalSubsetSAXFunc hasExternalSubset;
    resolveEntitySAXFunc resolveEntity;
    getEntitySAXFunc getEntity;
    entityDeclSAXFunc entityDecl;
    notationDeclSAXFunc notationDecl;
    attributeDeclSAXFunc attributeDecl;
    elementDeclSAXFunc elementDecl;
    unparsedEntityDeclSAXFunc unparsedEntityDecl;
    setDocumentLocatorSAXFunc setDocumentLocator;
    startDocumentSAXFunc startDocument;
    endDocumentSAXFunc endDocument;
    startElementSAXFunc startElement;
    endElementSAXFunc endElement;
    referenceSAXFunc reference;
    charactersSAXFunc characters;
    ignorableWhitespaceSAXFunc ignorableWhitespace;
    processingInstructionSAXFunc processingInstruction;
    commentSAXFunc comment;
    warningSAXFunc warning;
    errorSAXFunc error;
    fatalErrorSAXFunc fatalError; /* unused error() get all the errors */
    getParameterEntitySAXFunc getParameterEntity;
    cdataBlockSAXFunc cdataBlock;
    externalSubsetSAXFunc externalSubset;
    unsigned int initialized;
    /* The following fields are extensions available only on version 2 */
    void *_private;
    startElementNsSAX2Func startElementNs;
    endElementNsSAX2Func endElementNs;
    xmlStructuredErrorFunc serror;
};

A SAX handler is bunch of callbacks called by the parser when processing of the input generate data or structure informations.


struct xmlSAXHandlerV1

struct xmlSAXHandlerV1 {

    internalSubsetSAXFunc internalSubset;
    isStandaloneSAXFunc isStandalone;
    hasInternalSubsetSAXFunc hasInternalSubset;
    hasExternalSubsetSAXFunc hasExternalSubset;
    resolveEntitySAXFunc resolveEntity;
    getEntitySAXFunc getEntity;
    entityDeclSAXFunc entityDecl;
    notationDeclSAXFunc notationDecl;
    attributeDeclSAXFunc attributeDecl;
    elementDeclSAXFunc elementDecl;
    unparsedEntityDeclSAXFunc unparsedEntityDecl;
    setDocumentLocatorSAXFunc setDocumentLocator;
    startDocumentSAXFunc startDocument;
    endDocumentSAXFunc endDocument;
    startElementSAXFunc startElement;
    endElementSAXFunc endElement;
    referenceSAXFunc reference;
    charactersSAXFunc characters;
    ignorableWhitespaceSAXFunc ignorableWhitespace;
    processingInstructionSAXFunc processingInstruction;
    commentSAXFunc comment;
    warningSAXFunc warning;
    errorSAXFunc error;
    fatalErrorSAXFunc fatalError; /* unused error() get all the errors */
    getParameterEntitySAXFunc getParameterEntity;
    cdataBlockSAXFunc cdataBlock;
    externalSubsetSAXFunc externalSubset;
    unsigned int initialized;
};


xmlSAXHandlerV1Ptr

typedef xmlSAXHandlerV1 *xmlSAXHandlerV1Ptr;


xmlExternalEntityLoader ()

xmlParserInputPtr (*xmlExternalEntityLoader)
                                            (const char *URL,
                                             const char *ID,
                                             xmlParserCtxtPtr context);

External entity loaders types.

URL: The System ID of the resource requested
ID: The Public ID of the resource requested
context: the XML parser context
Returns :the entity input parser.

xmlInitParser ()

void        xmlInitParser                   (void);

Initialization function for the XML parser. This is not reentrant. Call once before processing in case of use in multithreaded programs.


xmlCleanupParser ()

void        xmlCleanupParser                (void);

Cleanup function for the XML library. It tries to reclaim all parsing related global memory allocated for the library processing. It doesn't deallocate any document related memory. Calling this function should not prevent reusing the library but one should call xmlCleanupParser() only when the process has finished using the library or XML document built with it.


xmlParserInputRead ()

int         xmlParserInputRead              (xmlParserInputPtr in,
                                             int len);

This function refresh the input for the parser. It doesn't try to preserve pointers to the input buffer, and discard already read data

in: an XML parser input
len: an indicative size for the lookahead
Returns :the number of xmlChars read, or -1 in case of error, 0 indicate the end of this entity

xmlParserInputGrow ()

int         xmlParserInputGrow              (xmlParserInputPtr in,
                                             int len);

This function increase the input for the parser. It tries to preserve pointers to the input buffer, and keep already read data

in: an XML parser input
len: an indicative size for the lookahead
Returns :the number of xmlChars read, or -1 in case of error, 0 indicate the end of this entity

xmlStrdup ()

xmlChar*    xmlStrdup                       (const xmlChar *cur);

a strdup for array of xmlChar's. Since they are supposed to be encoded in UTF-8 or an encoding with 8bit based chars, we assume a termination mark of '0'.

cur: the input xmlChar *
Returns :a new xmlChar * or NULL

xmlStrndup ()

xmlChar*    xmlStrndup                      (const xmlChar *cur,
                                             int len);

a strndup for array of xmlChar's

cur: the input xmlChar *
len: the len of cur
Returns :a new xmlChar * or NULL

xmlCharStrndup ()

xmlChar*    xmlCharStrndup                  (const char *cur,
                                             int len);

a strndup for char's to xmlChar's

cur: the input char *
len: the len of cur
Returns :a new xmlChar * or NULL

xmlCharStrdup ()

xmlChar*    xmlCharStrdup                   (const char *cur);

a strdup for char's to xmlChar's

cur: the input char *
Returns :a new xmlChar * or NULL

xmlStrsub ()

xmlChar*    xmlStrsub                       (const xmlChar *str,
                                             int start,
                                             int len);

Extract a substring of a given string

str: the xmlChar * array (haystack)
start: the index of the first char (zero based)
len: the length of the substring
Returns :the xmlChar * for the first occurrence or NULL.

xmlStrchr ()

const xmlChar* xmlStrchr                    (const xmlChar *str,
                                             xmlChar val);

a strchr for xmlChar's

str: the xmlChar * array
val: the xmlChar to search
Returns :the xmlChar * for the first occurrence or NULL.

xmlStrstr ()

const xmlChar* xmlStrstr                    (const xmlChar *str,
                                             const xmlChar *val);

a strstr for xmlChar's

str: the xmlChar * array (haystack)
val: the xmlChar to search (needle)
Returns :the xmlChar * for the first occurrence or NULL.

xmlStrcasestr ()

const xmlChar* xmlStrcasestr                (const xmlChar *str,
                                             xmlChar *val);

a case-ignoring strstr for xmlChar's

str: the xmlChar * array (haystack)
val: the xmlChar to search (needle)
Returns :the xmlChar * for the first occurrence or NULL.

xmlStrcmp ()

int         xmlStrcmp                       (const xmlChar *str1,
                                             const xmlChar *str2);

a strcmp for xmlChar's

str1: the first xmlChar *
str2: the second xmlChar *
Returns :the integer result of the comparison

xmlStrncmp ()

int         xmlStrncmp                      (const xmlChar *str1,
                                             const xmlChar *str2,
                                             int len);

a strncmp for xmlChar's

str1: the first xmlChar *
str2: the second xmlChar *
len: the max comparison length
Returns :the integer result of the comparison

xmlStrcasecmp ()

int         xmlStrcasecmp                   (const xmlChar *str1,
                                             const xmlChar *str2);

a strcasecmp for xmlChar's

str1: the first xmlChar *
str2: the second xmlChar *
Returns :the integer result of the comparison

xmlStrncasecmp ()

int         xmlStrncasecmp                  (const xmlChar *str1,
                                             const xmlChar *str2,
                                             int len);

a strncasecmp for xmlChar's

str1: the first xmlChar *
str2: the second xmlChar *
len: the max comparison length
Returns :the integer result of the comparison

xmlStrEqual ()

int         xmlStrEqual                     (const xmlChar *str1,
                                             const xmlChar *str2);

Check if both string are equal of have same content Should be a bit more readable and faster than xmlStrEqual()

str1: the first xmlChar *
str2: the second xmlChar *
Returns :1 if they are equal, 0 if they are different

xmlStrQEqual ()

int         xmlStrQEqual                    (const xmlChar *pref,
                                             const xmlChar *name,
                                             const xmlChar *str);

Check if a QName is Equal to a given string

pref: the prefix of the QName
name: the localname of the QName
str: the second xmlChar *
Returns :1 if they are equal, 0 if they are different

xmlStrlen ()

int         xmlStrlen                       (const xmlChar *str);

length of a xmlChar's string

str: the xmlChar * array
Returns :the number of xmlChar contained in the ARRAY.

xmlStrcat ()

xmlChar*    xmlStrcat                       (xmlChar *cur,
                                             const xmlChar *add);

a strcat for array of xmlChar's. Since they are supposed to be encoded in UTF-8 or an encoding with 8bit based chars, we assume a termination mark of '0'.

cur: the original xmlChar * array
add: the xmlChar * array added
Returns :a new xmlChar * containing the concatenated string.

xmlStrncat ()

xmlChar*    xmlStrncat                      (xmlChar *cur,
                                             const xmlChar *add,
                                             int len);

a strncat for array of xmlChar's, it will extend cur with the len first bytes of add.

cur: the original xmlChar * array
add: the xmlChar * array added
len: the length of add
Returns :a new xmlChar *, the original cur is reallocated if needed and should not be freed

xmlStrPrintf ()

int         xmlStrPrintf                    (xmlChar *buf,
                                             int len,
                                             const xmlChar *msg,
                                             ...);

Formats msg and places result into buf.

buf: the result buffer.
len: the result buffer length.
msg: the message with printf formatting.
...: extra parameters for the message.
Returns :the number of characters written to buf or -1 if an error occurs.

xmlStrVPrintf ()

int         xmlStrVPrintf                   (xmlChar *buf,
                                             int len,
                                             const xmlChar *msg,
                                             va_list ap);

Formats msg and places result into buf.

buf: the result buffer.
len: the result buffer length.
msg: the message with printf formatting.
ap: extra parameters for the message.
Returns :the number of characters written to buf or -1 if an error occurs.

xmlParseDoc ()

xmlDocPtr   xmlParseDoc                     (xmlChar *cur);

parse an XML in-memory document and build a tree.

cur: a pointer to an array of xmlChar
Returns :the resulting document tree

xmlParseMemory ()

xmlDocPtr   xmlParseMemory                  (const char *buffer,
                                             int size);

parse an XML in-memory block and build a tree.

buffer: an pointer to a char array
size: the size of the array
Returns :the resulting document tree

xmlParseFile ()

xmlDocPtr   xmlParseFile                    (const char *filename);

parse an XML file and build a tree. Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time.

filename: the filename
Returns :the resulting document tree if the file was wellformed, NULL otherwise.

xmlSubstituteEntitiesDefault ()

int         xmlSubstituteEntitiesDefault    (int val);

Set and return the previous value for default entity support. Initially the parser always keep entity references instead of substituting entity values in the output. This function has to be used to change the default parser behavior SAX::substituteEntities() has to be used for changing that on a file by file basis.

val: int 0 or 1
Returns :the last value for 0 for no substitution, 1 for substitution.

xmlKeepBlanksDefault ()

int         xmlKeepBlanksDefault            (int val);

Set and return the previous value for default blanks text nodes support. The 1.x version of the parser used an heuristic to try to detect ignorable white spaces. As a result the SAX callback was generating xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when using the DOM output text nodes containing those blanks were not generated. The 2.x and later version will switch to the XML standard way and ignorableWhitespace() are only generated when running the parser in validating mode and when the current element doesn't allow CDATA or mixed content. This function is provided as a way to force the standard behavior on 1.X libs and to switch back to the old mode for compatibility when running 1.X client code on 2.X . Upgrade of 1.X code should be done by using xmlIsBlankNode() commodity function to detect the "empty" nodes generated. This value also affect autogeneration of indentation when saving code if blanks sections are kept, indentation is not generated.

val: int 0 or 1
Returns :the last value for 0 for no substitution, 1 for substitution.

xmlStopParser ()

void        xmlStopParser                   (xmlParserCtxtPtr ctxt);

Blocks further parser processing

ctxt: an XML parser context

xmlPedanticParserDefault ()

int         xmlPedanticParserDefault        (int val);

Set and return the previous value for enabling pedantic warnings.

val: int 0 or 1
Returns :the last value for 0 for no substitution, 1 for substitution.

xmlLineNumbersDefault ()

int         xmlLineNumbersDefault           (int val);

Set and return the previous value for enabling line numbers in elements contents. This may break on old application and is turned off by default.

val: int 0 or 1
Returns :the last value for 0 for no substitution, 1 for substitution.

xmlRecoverDoc ()

xmlDocPtr   xmlRecoverDoc                   (xmlChar *cur);

parse an XML in-memory document and build a tree. In the case the document is not Well Formed, a tree is built anyway

cur: a pointer to an array of xmlChar
Returns :the resulting document tree

xmlRecoverMemory ()

xmlDocPtr   xmlRecoverMemory                (const char *buffer,
                                             int size);

parse an XML in-memory block and build a tree. In the case the document is not Well Formed, a tree is built anyway

buffer: an pointer to a char array
size: the size of the array
Returns :the resulting document tree

xmlRecoverFile ()

xmlDocPtr   xmlRecoverFile                  (const char *filename);

parse an XML file and build a tree. Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time. In the case the document is not Well Formed, a tree is built anyway

filename: the filename
Returns :the resulting document tree

xmlParseDocument ()

int         xmlParseDocument                (xmlParserCtxtPtr ctxt);

parse an XML document (and build a tree if using the standard SAX interface).

[1] document ::= prolog element Misc*

[22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?

ctxt: an XML parser context
Returns :0, -1 in case of error. the parser context is augmented as a result of the parsing.

xmlParseExtParsedEnt ()

int         xmlParseExtParsedEnt            (xmlParserCtxtPtr ctxt);

parse a general parsed entity An external general parsed entity is well-formed if it matches the production labeled extParsedEnt.

[78] extParsedEnt ::= TextDecl? content

ctxt: an XML parser context
Returns :0, -1 in case of error. the parser context is augmented as a result of the parsing.

xmlSAXParseDoc ()

xmlDocPtr   xmlSAXParseDoc                  (xmlSAXHandlerPtr sax,
                                             xmlChar *cur,
                                             int recovery);

parse an XML in-memory document and build a tree. It use the given SAX function block to handle the parsing callback. If sax is NULL, fallback to the default DOM tree building routines.

sax: the SAX handler block
cur: a pointer to an array of xmlChar
recovery: work in recovery mode, i.e. tries to read no Well Formed documents
Returns :the resulting document tree

xmlSAXUserParseFile ()

int         xmlSAXUserParseFile             (xmlSAXHandlerPtr sax,
                                             void *user_data,
                                             const char *filename);

parse an XML file and call the given SAX handler routines. Automatic support for ZLIB/Compress compressed document is provided

sax: a SAX handler
user_data: The user data returned on SAX callbacks
filename: a file name
Returns :0 in case of success or a error number otherwise

xmlSAXUserParseMemory ()

int         xmlSAXUserParseMemory           (xmlSAXHandlerPtr sax,
                                             void *user_data,
                                             const char *buffer,
                                             int size);

A better SAX parsing routine. parse an XML in-memory buffer and call the given SAX handler routines.

sax: a SAX handler
user_data: The user data returned on SAX callbacks
buffer: an in-memory XML document input
size: the length of the XML document in bytes
Returns :0 in case of success or a error number otherwise

xmlSAXParseMemory ()

xmlDocPtr   xmlSAXParseMemory               (xmlSAXHandlerPtr sax,
                                             const char *buffer,
                                             int size,
                                             int recovery);

parse an XML in-memory block and use the given SAX function block to handle the parsing callback. If sax is NULL, fallback to the default DOM tree building routines.

sax: the SAX handler block
buffer: an pointer to a char array
size: the size of the array
recovery: work in recovery mode, i.e. tries to read not Well Formed documents
Returns :the resulting document tree

xmlSAXParseMemoryWithData ()

xmlDocPtr   xmlSAXParseMemoryWithData       (xmlSAXHandlerPtr sax,
                                             const char *buffer,
                                             int size,
                                             int recovery,
                                             void *data);

parse an XML in-memory block and use the given SAX function block to handle the parsing callback. If sax is NULL, fallback to the default DOM tree building routines.

User data (void *) is stored within the parser context in the context's _private member, so it is available nearly everywhere in libxml

sax: the SAX handler block
buffer: an pointer to a char array
size: the size of the array
recovery: work in recovery mode, i.e. tries to read no Well Formed documents
data: the userdata
Returns :the resulting document tree

xmlSAXParseFile ()

xmlDocPtr   xmlSAXParseFile                 (xmlSAXHandlerPtr sax,
                                             const char *filename,
                                             int recovery);

parse an XML file and build a tree. Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time. It use the given SAX function block to handle the parsing callback. If sax is NULL, fallback to the default DOM tree building routines.

sax: the SAX handler block
filename: the filename
recovery: work in recovery mode, i.e. tries to read no Well Formed documents
Returns :the resulting document tree

xmlSAXParseFileWithData ()

xmlDocPtr   xmlSAXParseFileWithData         (xmlSAXHandlerPtr sax,
                                             const char *filename,
                                             int recovery,
                                             void *data);

parse an XML file and build a tree. Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time. It use the given SAX function block to handle the parsing callback. If sax is NULL, fallback to the default DOM tree building routines.

User data (void *) is stored within the parser context in the context's _private member, so it is available nearly everywhere in libxml

sax: the SAX handler block
filename: the filename
recovery: work in recovery mode, i.e. tries to read no Well Formed documents
data: the userdata
Returns :the resulting document tree

xmlSAXParseEntity ()

xmlDocPtr   xmlSAXParseEntity               (xmlSAXHandlerPtr sax,
                                             const char *filename);

parse an XML external entity out of context and build a tree. It use the given SAX function block to handle the parsing callback. If sax is NULL, fallback to the default DOM tree building routines.

[78] extParsedEnt ::= TextDecl? content

This correspond to a "Well Balanced" chunk

sax: the SAX handler block
filename: the filename
Returns :the resulting document tree

xmlParseEntity ()

xmlDocPtr   xmlParseEntity                  (const char *filename);

parse an XML external entity out of context and build a tree.

[78] extParsedEnt ::= TextDecl? content

This correspond to a "Well Balanced" chunk

filename: the filename
Returns :the resulting document tree

xmlParseDTD ()

xmlDtdPtr   xmlParseDTD                     (const xmlChar *ExternalID,
                                             const xmlChar *SystemID);

Load and parse an external subset.

ExternalID: a NAME* containing the External ID of the DTD
SystemID: a NAME* containing the URL to the DTD
Returns :the resulting xmlDtdPtr or NULL in case of error.

xmlSAXParseDTD ()

xmlDtdPtr   xmlSAXParseDTD                  (xmlSAXHandlerPtr sax,
                                             const xmlChar *ExternalID,
                                             const xmlChar *SystemID);

Load and parse an external subset.

sax: the SAX handler block
ExternalID: a NAME* containing the External ID of the DTD
SystemID: a NAME* containing the URL to the DTD
Returns :the resulting xmlDtdPtr or NULL in case of error.

xmlIOParseDTD ()

xmlDtdPtr   xmlIOParseDTD                   (xmlSAXHandlerPtr sax,
                                             xmlParserInputBufferPtr input,
                                             xmlCharEncoding enc);

Load and parse a DTD

sax: the SAX handler block or NULL
input: an Input Buffer
enc: the charset encoding if known
Returns :the resulting xmlDtdPtr or NULL in case of error. input will be freed at parsing end.

xmlParseBalancedChunkMemory ()

int         xmlParseBalancedChunkMemory     (xmlDocPtr doc,
                                             xmlSAXHandlerPtr sax,
                                             void *user_data,
                                             int depth,
                                             const xmlChar *string,
                                             xmlNodePtr *lst);

Parse a well-balanced chunk of an XML document called by the parser The allowed sequence for the Well Balanced Chunk is the one defined by the content production in the XML grammar:

[43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*

doc: the document the chunk pertains to
sax: the SAX handler bloc (possibly NULL)
user_data: The user data returned on SAX callbacks (possibly NULL)
depth: Used for loop detection, use 0
string: the input string in UTF8 or ISO-Latin (zero terminated)
lst: the return value for the set of parsed nodes
Returns :0 if the chunk is well balanced, -1 in case of args problem and the parser error code otherwise

xmlParseBalancedChunkMemoryRecover ()

int         xmlParseBalancedChunkMemoryRecover
                                            (xmlDocPtr doc,
                                             xmlSAXHandlerPtr sax,
                                             void *user_data,
                                             int depth,
                                             const xmlChar *string,
                                             xmlNodePtr *lst,
                                             int recover);

Parse a well-balanced chunk of an XML document called by the parser The allowed sequence for the Well Balanced Chunk is the one defined by the content production in the XML grammar:

[43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*

doc: the document the chunk pertains to
sax: the SAX handler bloc (possibly NULL)
user_data: The user data returned on SAX callbacks (possibly NULL)
depth: Used for loop detection, use 0
string: the input string in UTF8 or ISO-Latin (zero terminated)
lst: the return value for the set of parsed nodes
recover: return nodes even if the data is broken (use 0)
Returns :0 if the chunk is well balanced, -1 in case of args problem and the parser error code otherwise In case recover is set to 1, the nodelist will not be empty even if the parsed chunk is not well balanced.

xmlParseExternalEntity ()

int         xmlParseExternalEntity          (xmlDocPtr doc,
                                             xmlSAXHandlerPtr sax,
                                             void *user_data,
                                             int depth,
                                             const xmlChar *URL,
                                             const xmlChar *ID,
                                             xmlNodePtr *lst);

Parse an external general entity An external general parsed entity is well-formed if it matches the production labeled extParsedEnt.

[78] extParsedEnt ::= TextDecl? content

doc: the document the chunk pertains to
sax: the SAX handler bloc (possibly NULL)
user_data: The user data returned on SAX callbacks (possibly NULL)
depth: Used for loop detection, use 0
URL: the URL for the entity to load
ID: the System ID for the entity to load
lst: the return value for the set of parsed nodes
Returns :0 if the entity is well formed, -1 in case of args problem and the parser error code otherwise

xmlParseCtxtExternalEntity ()

int         xmlParseCtxtExternalEntity      (xmlParserCtxtPtr ctx,
                                             const xmlChar *URL,
                                             const xmlChar *ID,
                                             xmlNodePtr *lst);

Parse an external general entity within an existing parsing context An external general parsed entity is well-formed if it matches the production labeled extParsedEnt.

[78] extParsedEnt ::= TextDecl? content

ctx: the existing parsing context
URL: the URL for the entity to load
ID: the System ID for the entity to load
lst: the return value for the set of parsed nodes
Returns :0 if the entity is well formed, -1 in case of args problem and the parser error code otherwise

xmlNewParserCtxt ()

xmlParserCtxtPtr xmlNewParserCtxt           (void);

Allocate and initialize a new parser context.

Returns :the xmlParserCtxtPtr or NULL

xmlInitParserCtxt ()

int         xmlInitParserCtxt               (xmlParserCtxtPtr ctxt);

Initialize a parser context

ctxt: an HTML parser context
Returns :0 in case of success and -1 in case of error

xmlClearParserCtxt ()

void        xmlClearParserCtxt              (xmlParserCtxtPtr ctxt);

Clear (release owned resources) and reinitialize a parser context

ctxt: an XML parser context

xmlFreeParserCtxt ()

void        xmlFreeParserCtxt               (xmlParserCtxtPtr ctxt);

Free all the memory used by a parser context. However the parsed document in ctxt->myDoc is not freed.

ctxt: an XML parser context

xmlSetupParserForBuffer ()

void        xmlSetupParserForBuffer         (xmlParserCtxtPtr ctxt,
                                             const xmlChar *buffer,
                                             const char *filename);

Setup the parser context to parse a new buffer; Clears any prior contents from the parser context. The buffer parameter must not be NULL, but the filename parameter can be

ctxt: an XML parser context
buffer: a xmlChar * buffer
filename: a file name

xmlCreateDocParserCtxt ()

xmlParserCtxtPtr xmlCreateDocParserCtxt     (const xmlChar *cur);

Creates a parser context for an XML in-memory document.

cur: a pointer to an array of xmlChar
Returns :the new parser context or NULL

xmlGetFeaturesList ()

int         xmlGetFeaturesList              (int *len,
                                             const char **result);

Copy at most *len feature names into the result array

len: the length of the features name array (input/output)
result: an array of string to be filled with the features name.
Returns :-1 in case or error, or the total number of features, len is updated with the number of strings copied, strings must not be deallocated

xmlGetFeature ()

int         xmlGetFeature                   (xmlParserCtxtPtr ctxt,
                                             const char *name,
                                             void *result);

Read the current value of one feature of this parser instance

ctxt: an XML/HTML parser context
name: the feature name
result: location to store the result
Returns :-1 in case or error, 0 otherwise

xmlSetFeature ()

int         xmlSetFeature                   (xmlParserCtxtPtr ctxt,
                                             const char *name,
                                             void *value);

Change the current value of one feature of this parser instance

ctxt: an XML/HTML parser context
name: the feature name
value: pointer to the location of the new value
Returns :-1 in case or error, 0 otherwise

xmlCreatePushParserCtxt ()

xmlParserCtxtPtr xmlCreatePushParserCtxt    (xmlSAXHandlerPtr sax,
                                             void *user_data,
                                             const char *chunk,
                                             int size,
                                             const char *filename);

Create a parser context for using the XML parser in push mode. If buffer and size are non-NULL, the data is used to detect the encoding. The remaining characters will be parsed so they don't need to be fed in again through xmlParseChunk. To allow content encoding detection, size should be >= 4 The value of filename is used for fetching external entities and error/warning reports.

sax: a SAX handler
user_data: The user data returned on SAX callbacks
chunk: a pointer to an array of chars
size: number of chars in the array
filename: an optional file name or URI
Returns :the new parser context or NULL

xmlParseChunk ()

int         xmlParseChunk                   (xmlParserCtxtPtr ctxt,
                                             const char *chunk,
                                             int size,
                                             int terminate);

Parse a Chunk of memory

ctxt: an XML parser context
chunk: an char array
size: the size in byte of the chunk
terminate: last chunk indicator
Returns :zero if no error, the xmlParserErrors otherwise.

xmlCreateIOParserCtxt ()

xmlParserCtxtPtr xmlCreateIOParserCtxt      (xmlSAXHandlerPtr sax,
                                             void *user_data,
                                             xmlInputReadCallback ioread,
                                             xmlInputCloseCallback ioclose,
                                             void *ioctx,
                                             xmlCharEncoding enc);

Create a parser context for using the XML parser with an existing I/O stream

sax: a SAX handler
user_data: The user data returned on SAX callbacks
ioread: an I/O read function
ioclose: an I/O close function
ioctx: an I/O handler
enc: the charset encoding if known
Returns :the new parser context or NULL

xmlNewIOInputStream ()

xmlParserInputPtr xmlNewIOInputStream       (xmlParserCtxtPtr ctxt,
                                             xmlParserInputBufferPtr input,
                                             xmlCharEncoding enc);

Create a new input stream structure encapsulating the input into a stream suitable for the parser.

ctxt: an XML parser context
input: an I/O Input
enc: the charset encoding if known
Returns :the new input stream or NULL

xmlParserFindNodeInfo ()

const xmlParserNodeInfo* xmlParserFindNodeInfo
                                            (const xmlParserCtxtPtr ctxt,
                                             const xmlNodePtr node);

Find the parser node info struct for a given node

ctxt:
node: an XML node within the tree
Returns :an xmlParserNodeInfo block pointer or NULL

xmlInitNodeInfoSeq ()

void        xmlInitNodeInfoSeq              (xmlParserNodeInfoSeqPtr seq);

-- Initialize (set to initial state) node info sequence

seq: a node info sequence pointer

xmlClearNodeInfoSeq ()

void        xmlClearNodeInfoSeq             (xmlParserNodeInfoSeqPtr seq);

-- Clear (release memory and reinitialize) node info sequence

seq: a node info sequence pointer

xmlParserAddNodeInfo ()

void        xmlParserAddNodeInfo            (xmlParserCtxtPtr ctxt,
                                             const xmlParserNodeInfoPtr info);

Insert node info record into the sorted sequence

ctxt: an XML parser context
info: a node info sequence pointer

xmlSetExternalEntityLoader ()

void        xmlSetExternalEntityLoader      (xmlExternalEntityLoader f);

Changes the defaultexternal entity resolver function for the application

f: the new entity resolver function

xmlGetExternalEntityLoader ()

xmlExternalEntityLoader xmlGetExternalEntityLoader
                                            (void);

Get the default external entity resolver function for the application

Returns :the xmlExternalEntityLoader function pointer

xmlLoadExternalEntity ()

xmlParserInputPtr xmlLoadExternalEntity     (const char *URL,
                                             const char *ID,
                                             xmlParserCtxtPtr ctxt);

Load an external entity, note that the use of this function for unparsed entities may generate problems TODO: a more generic External entity API must be designed

URL: the URL for the entity to load
ID: the Public ID for the entity to load
ctxt: the context in which the entity is called or NULL
Returns :the xmlParserInputPtr or NULL

enum xmlParserOption

typedef enum {
    XML_PARSE_RECOVER	= 1<<0,	/* recover on errors */
    XML_PARSE_NOENT	= 1<<1,	/* substitute entities */
    XML_PARSE_DTDLOAD	= 1<<2,	/* load the external subset */
    XML_PARSE_DTDATTR	= 1<<3,	/* default DTD attributes */
    XML_PARSE_DTDVALID	= 1<<4,	/* validate with the DTD */
    XML_PARSE_NOERROR	= 1<<5,	/* suppress error reports */
    XML_PARSE_NOWARNING	= 1<<6,	/* suppress warning reports */
    XML_PARSE_PEDANTIC	= 1<<7,	/* pedantic error reporting */
    XML_PARSE_NOBLANKS	= 1<<8,	/* remove blank nodes */
    XML_PARSE_SAX1	= 1<<9,	/* use the SAX1 interface internally */
    XML_PARSE_XINCLUDE	= 1<<10,/* Implement XInclude substitition  */
    XML_PARSE_NONET	= 1<<11,/* Forbid network access */
    XML_PARSE_NODICT	= 1<<12,/* Do not reuse the context dictionnary */
    XML_PARSE_NSCLEAN	= 1<<13,/* remove redundant namespaces declarations */
    XML_PARSE_NOCDATA	= 1<<14 /* merge CDATA as text nodes */
} xmlParserOption;

This is the set of XML parser options that can be passed down to the xmlReadDoc() and similar calls.


xmlCtxtReset ()

void        xmlCtxtReset                    (xmlParserCtxtPtr ctxt);

Reset a parser context

ctxt: an XML parser context

xmlCtxtResetPush ()

int         xmlCtxtResetPush                (xmlParserCtxtPtr ctxt,
                                             const char *chunk,
                                             int size,
                                             const char *filename,
                                             const char *encoding);

Reset a push parser context

ctxt: an XML parser context
chunk: a pointer to an array of chars
size: number of chars in the array
filename: an optional file name or URI
encoding: the document encoding, or NULL
Returns :0 in case of success and 1 in case of error

xmlCtxtUseOptions ()

int         xmlCtxtUseOptions               (xmlParserCtxtPtr ctxt,
                                             int options);

Applies the options to the parser context

ctxt: an XML parser context
options: a combination of xmlParserOption(s)
Returns :0 in case of success, the set of unknown or unimplemented options in case of error.

xmlReadDoc ()

xmlDocPtr   xmlReadDoc                      (const xmlChar *cur,
                                             const char *URL,
                                             const char *encoding,
                                             int options);

parse an XML in-memory document and build a tree.

cur: a pointer to a zero terminated string
URL: the base URL to use for the document
encoding: the document encoding, or NULL
options: a combination of xmlParserOption(s)
Returns :the resulting document tree

xmlReadFile ()

xmlDocPtr   xmlReadFile                     (const char *URL,
                                             const char *encoding,
                                             int options);

parse an XML file from the filesystem or the network.

URL:
encoding: the document encoding, or NULL
options: a combination of xmlParserOption(s)
Returns :the resulting document tree

xmlReadMemory ()

xmlDocPtr   xmlReadMemory                   (const char *buffer,
                                             int size,
                                             const char *URL,
                                             const char *encoding,
                                             int options);

parse an XML in-memory document and build a tree.

buffer: a pointer to a char array
size: the size of the array
URL: the base URL to use for the document
encoding: the document encoding, or NULL
options: a combination of xmlParserOption(s)
Returns :the resulting document tree

xmlReadFd ()

xmlDocPtr   xmlReadFd                       (int fd,
                                             const char *URL,
                                             const char *encoding,
                                             int options);

parse an XML from a file descriptor and build a tree.

fd: an open file descriptor
URL: the base URL to use for the document
encoding: the document encoding, or NULL
options: a combination of xmlParserOption(s)
Returns :the resulting document tree

xmlReadIO ()

xmlDocPtr   xmlReadIO                       (xmlInputReadCallback ioread,
                                             xmlInputCloseCallback ioclose,
                                             void *ioctx,
                                             const char *URL,
                                             const char *encoding,
                                             int options);

parse an XML document from I/O functions and source and build a tree.

ioread: an I/O read function
ioclose: an I/O close function
ioctx: an I/O handler
URL: the base URL to use for the document
encoding: the document encoding, or NULL
options: a combination of xmlParserOption(s)
Returns :the resulting document tree

xmlCtxtReadDoc ()

xmlDocPtr   xmlCtxtReadDoc                  (xmlParserCtxtPtr ctxt,
                                             const xmlChar *cur,
                                             const char *URL,
                                             const char *encoding,
                                             int options);

parse an XML in-memory document and build a tree. This reuses the existing ctxt parser context

ctxt: an XML parser context
cur: a pointer to a zero terminated string
URL: the base URL to use for the document
encoding: the document encoding, or NULL
options: a combination of xmlParserOption(s)
Returns :the resulting document tree

xmlCtxtReadFile ()

xmlDocPtr   xmlCtxtReadFile                 (xmlParserCtxtPtr ctxt,
                                             const char *filename,
                                             const char *encoding,
                                             int options);

parse an XML file from the filesystem or the network. This reuses the existing ctxt parser context

ctxt: an XML parser context
filename: a file or URL
encoding: the document encoding, or NULL
options: a combination of xmlParserOption(s)
Returns :the resulting document tree

xmlCtxtReadMemory ()

xmlDocPtr   xmlCtxtReadMemory               (xmlParserCtxtPtr ctxt,
                                             const char *buffer,
                                             int size,
                                             const char *URL,
                                             const char *encoding,
                                             int options);

parse an XML in-memory document and build a tree. This reuses the existing ctxt parser context

ctxt: an XML parser context
buffer: a pointer to a char array
size: the size of the array
URL: the base URL to use for the document
encoding: the document encoding, or NULL
options: a combination of xmlParserOption(s)
Returns :the resulting document tree

xmlCtxtReadFd ()

xmlDocPtr   xmlCtxtReadFd                   (xmlParserCtxtPtr ctxt,
                                             int fd,
                                             const char *URL,
                                             const char *encoding,
                                             int options);

parse an XML from a file descriptor and build a tree. This reuses the existing ctxt parser context

ctxt: an XML parser context
fd: an open file descriptor
URL: the base URL to use for the document
encoding: the document encoding, or NULL
options: a combination of xmlParserOption(s)
Returns :the resulting document tree

xmlCtxtReadIO ()

xmlDocPtr   xmlCtxtReadIO                   (xmlParserCtxtPtr ctxt,
                                             xmlInputReadCallback ioread,
                                             xmlInputCloseCallback ioclose,
                                             void *ioctx,
                                             const char *URL,
                                             const char *encoding,
                                             int options);

parse an XML document from I/O functions and source and build a tree. This reuses the existing ctxt parser context

ctxt: an XML parser context
ioread: an I/O read function
ioclose: an I/O close function
ioctx: an I/O handler
URL: the base URL to use for the document
encoding: the document encoding, or NULL
options: a combination of xmlParserOption(s)
Returns :the resulting document tree