parserInternals

parserInternals —

Synopsis




extern      unsigned int xmlParserMaxDepth;
#define     XML_MAX_NAMELEN
#define     INPUT_CHUNK
#define     IS_BYTE_CHAR                    (c)
#define     IS_CHAR                         (c)
#define     IS_CHAR_CH                      (c)
#define     IS_BLANK                        (c)
#define     IS_BLANK_CH                     (c)
#define     IS_BASECHAR                     (c)
#define     IS_DIGIT                        (c)
#define     IS_DIGIT_CH                     (c)
#define     IS_COMBINING                    (c)
#define     IS_COMBINING_CH                 (c)
#define     IS_EXTENDER                     (c)
#define     IS_EXTENDER_CH                  (c)
#define     IS_IDEOGRAPHIC                  (c)
#define     IS_LETTER                       (c)
#define     IS_LETTER_CH                    (c)
#define     IS_PUBIDCHAR                    (c)
#define     IS_PUBIDCHAR_CH                 (c)
#define     SKIP_EOL                        (p)
#define     MOVETO_ENDTAG                   (p)
#define     MOVETO_STARTTAG                 (p)
#define     XML_SUBSTITUTE_NONE
#define     XML_SUBSTITUTE_REF
#define     XML_SUBSTITUTE_PEREF
#define     XML_SUBSTITUTE_BOTH
void        (*xmlEntityReferenceFunc)       (xmlEntityPtr ent,
                                             xmlNodePtr firstNode,
                                             xmlNodePtr lastNode);

Description

Details

xmlParserMaxDepth

extern unsigned int xmlParserMaxDepth;

arbitrary depth limit for the XML documents that we allow to process. This is not a limitation of the parser but a safety boundary feature.


XML_MAX_NAMELEN

#define XML_MAX_NAMELEN 100

Identifiers can be longer, but this will be more costly at runtime.


INPUT_CHUNK

#define INPUT_CHUNK	250

The parser tries to always have that amount of input ready. One of the point is providing context when reporting errors.


IS_BYTE_CHAR()

#define IS_BYTE_CHAR(c)	 xmlIsChar_ch(c)

Macro to check the following production in the XML spec:

[2] Char ::= x9 | xA | xD | [x20...] any byte character in the accepted range

c: an byte value (int)

IS_CHAR()

#define IS_CHAR(c)   xmlIsCharQ(c)

Macro to check the following production in the XML spec:

[2] Char ::= x9 | xA | xD | [x20-xD7FF] | [xE000-xFFFD] | [x10000-x10FFFF] any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.

c: an UNICODE value (int)

IS_CHAR_CH()

#define IS_CHAR_CH(c)  xmlIsChar_ch(c)

Behaves like IS_CHAR on single-byte value

c: an xmlChar (usually an unsigned char)

IS_BLANK()

#define IS_BLANK(c)  xmlIsBlankQ(c)

Macro to check the following production in the XML spec:

[3] S ::= (x20 | x9 | xD | xA)+

c: an UNICODE value (int)

IS_BLANK_CH()

#define IS_BLANK_CH(c)  xmlIsBlank_ch(c)

Behaviour same as IS_BLANK

c: an xmlChar value (normally unsigned char)

IS_BASECHAR()

#define IS_BASECHAR(c) xmlIsBaseCharQ(c)

Macro to check the following production in the XML spec:

[85] BaseChar ::= ... long list see REC ...

c: an UNICODE value (int)

IS_DIGIT()

#define IS_DIGIT(c) xmlIsDigitQ(c)

Macro to check the following production in the XML spec:

[88] Digit ::= ... long list see REC ...

c: an UNICODE value (int)

IS_DIGIT_CH()

#define IS_DIGIT_CH(c)  xmlIsDigit_ch(c)

Behaves like IS_DIGIT but with a single byte argument

c: an xmlChar value (usually an unsigned char)

IS_COMBINING()

#define IS_COMBINING(c) xmlIsCombiningQ(c)

Macro to check the following production in the XML spec:

[87] CombiningChar ::= ... long list see REC ...

c: an UNICODE value (int)

IS_COMBINING_CH()

#define IS_COMBINING_CH(c) 0 

Always false (all combining chars > 0xff)

c: an xmlChar (usually an unsigned char)

IS_EXTENDER()

#define IS_EXTENDER(c) xmlIsExtenderQ(c)

Macro to check the following production in the XML spec:

[89] Extender ::= x00B7 | x02D0 | x02D1 | x0387 | x0640 | x0E46 | x0EC6 | x3005 | [x3031-x3035] | [x309D-x309E] | [x30FC-x30FE]

c: an UNICODE value (int)

IS_EXTENDER_CH()

#define IS_EXTENDER_CH(c)  xmlIsExtender_ch(c)

Behaves like IS_EXTENDER but with a single-byte argument

c: an xmlChar value (usually an unsigned char)

IS_IDEOGRAPHIC()

#define IS_IDEOGRAPHIC(c) xmlIsIdeographicQ(c)

Macro to check the following production in the XML spec:

[86] Ideographic ::= [x4E00-x9FA5] | x3007 | [x3021-x3029]

c: an UNICODE value (int)

IS_LETTER()

#define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))

Macro to check the following production in the XML spec:

[84] Letter ::= BaseChar | Ideographic

c: an UNICODE value (int)

IS_LETTER_CH()

#define IS_LETTER_CH(c) xmlIsBaseChar_ch(c)

Macro behaves like IS_LETTER, but only check base chars

c: an xmlChar value (normally unsigned char)

IS_PUBIDCHAR()

#define IS_PUBIDCHAR(c)	xmlIsPubidCharQ(c)

Macro to check the following production in the XML spec:

[13] PubidChar ::= x20 | xD | xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]

c: an UNICODE value (int)

IS_PUBIDCHAR_CH()

#define IS_PUBIDCHAR_CH(c) xmlIsPubidChar_ch(c)

Same as IS_PUBIDCHAR but for single-byte value

c: an xmlChar value (normally unsigned char)

SKIP_EOL()

#define     SKIP_EOL(p)

Skips the end of line chars.

p: and UTF8 string pointer

MOVETO_ENDTAG()

#define     MOVETO_ENDTAG(p)

Skips to the next '>' char.

p: and UTF8 string pointer

MOVETO_STARTTAG()

#define     MOVETO_STARTTAG(p)

Skips to the next '<' char.

p: and UTF8 string pointer

XML_SUBSTITUTE_NONE

#define XML_SUBSTITUTE_NONE	0

If no entities need to be substituted.


XML_SUBSTITUTE_REF

#define XML_SUBSTITUTE_REF	1

Whether general entities need to be substituted.


XML_SUBSTITUTE_PEREF

#define XML_SUBSTITUTE_PEREF	2

Whether parameter entities need to be substituted.


XML_SUBSTITUTE_BOTH

#define XML_SUBSTITUTE_BOTH 	3

Both general and parameter entities need to be substituted.


xmlEntityReferenceFunc ()

void        (*xmlEntityReferenceFunc)       (xmlEntityPtr ent,
                                             xmlNodePtr firstNode,
                                             xmlNodePtr lastNode);

Callback function used when one needs to be able to track back the provenance of a chunk of nodes inherited from an entity replacement.

ent: the entity
firstNode: the fist node in the chunk
lastNode: the last nod in the chunk