Details
>htmlParserCtxt
typedef xmlParserCtxt htmlParserCtxt; |
>htmlParserCtxtPtr
typedef xmlParserCtxtPtr htmlParserCtxtPtr; |
>htmlParserNodeInfo
typedef xmlParserNodeInfo htmlParserNodeInfo; |
>htmlSAXHandler
typedef xmlSAXHandler htmlSAXHandler; |
>htmlSAXHandlerPtr
typedef xmlSAXHandlerPtr htmlSAXHandlerPtr; |
>htmlParserInput
typedef xmlParserInput htmlParserInput; |
>htmlParserInputPtr
typedef xmlParserInputPtr htmlParserInputPtr; |
>htmlDocPtr
typedef xmlDocPtr htmlDocPtr; |
>htmlNodePtr
typedef xmlNodePtr htmlNodePtr; |
>struct htmlElemDesc
struct htmlElemDesc {
const char *name; /* The tag name */
char startTag; /* Whether the start tag can be implied */
char endTag; /* Whether the end tag can be implied */
char saveEndTag; /* Whether the end tag should be saved */
char empty; /* Is this an empty element ? */
char depr; /* Is this a deprecated element ? */
char dtd; /* 1: only in Loose DTD, 2: only Frameset one */
char isinline; /* is this a block 0 or inline 1 element */
const char *desc; /* the description */
}; |
>htmlElemDescPtr
typedef htmlElemDesc *htmlElemDescPtr; |
>struct htmlEntityDesc
struct htmlEntityDesc {
unsigned int value; /* the UNICODE value for the character */
const char *name; /* The entity name */
const char *desc; /* the description */
}; |
>htmlEntityDescPtr
typedef htmlEntityDesc *htmlEntityDescPtr; |
>htmlTagLookup ()
Lookup the HTML tag in the ElementTable
>htmlEntityLookup ()
Lookup the given entity in EntitiesTable
TODO: the linear scan is really ugly, an hash table is really needed.
>htmlEntityValueLookup ()
Lookup the given entity in EntitiesTable
TODO: the linear scan is really ugly, an hash table is really needed.
>htmlIsAutoClosed ()
The HTML DTD allows a tag to implicitly close other tags.
The list is kept in htmlStartClose array. This function checks
if a tag is autoclosed by one of it's child
>htmlAutoCloseTag ()
The HTML DTD allows a tag to implicitly close other tags.
The list is kept in htmlStartClose array. This function checks
if the element or one of it's children would autoclose the
given tag.
>htmlParseEntityRef ()
parse an HTML ENTITY references
[68] EntityRef ::= '&' Name ';'
>htmlParseCharRef ()
parse Reference declarations
[66] CharRef ::= '&#' [0-9]+ ';' |
'&x' [0-9a-fA-F]+ ';'
>htmlParseElement ()
parse an HTML element, this is highly recursive
[39] element ::= EmptyElemTag | STag content ETag
[41] Attribute ::= Name Eq AttValue
>htmlSAXParseDoc ()
Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks
to handle parse events. If sax is NULL, fallback to the default DOM
behavior and return a tree.
>htmlParseDoc ()
parse an HTML in-memory document and build a tree.
>htmlSAXParseFile ()
parse an HTML file and build a tree. Automatic support for ZLIB/Compress
compressed document is provided by default if found at compile-time.
It use the given SAX function block to handle the parsing callback.
If sax is NULL, fallback to the default DOM tree building routines.
>htmlParseFile ()
htmlDocPtr htmlParseFile (const char *filename,
const char *encoding); |
parse an HTML file and build a tree. Automatic support for ZLIB/Compress
compressed document is provided by default if found at compile-time.
>UTF8ToHtml ()
int UTF8ToHtml (unsigned char *out,
int *outlen,
unsigned char *in,
int *inlen); |
Take a block of UTF-8 chars in and try to convert it to an ASCII
plus HTML entities block of chars out.
>htmlEncodeEntities ()
int htmlEncodeEntities (unsigned char *out,
int *outlen,
unsigned char *in,
int *inlen,
int quoteChar); |
Take a block of UTF-8 chars in and try to convert it to an ASCII
plus HTML entities block of chars out.
>htmlIsScriptAttribute ()
int htmlIsScriptAttribute (const xmlChar *name); |
Check if an attribute is of content type Script
>htmlHandleOmittedElem ()
int htmlHandleOmittedElem (int val); |
Set and return the previous value for handling HTML omitted tags.
>htmlFreeParserCtxt ()
Free all the memory used by a parser context. However the parsed
document in ctxt->myDoc is not freed.
>htmlCreatePushParserCtxt ()
Create a parser context for using the HTML parser in push mode
To allow content encoding detection, size should be >= 4
The value of filename is used for fetching external entities
and error/warning reports.
>htmlParseChunk ()
int htmlParseChunk (htmlParserCtxtPtr ctxt,
const char *chunk,
int size,
int terminate); |
Parse a Chunk of memory