mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-02-27 17:57:22 +03:00
BUG FIXED #2784 HTML parsing/output improvements Rebuilt, updated the docs Improvement of regression scripts, make testall should look clean Released as 1.7.4
1528 lines
21 KiB
HTML
1528 lines
21 KiB
HTML
<HTML
|
|
><HEAD
|
|
><TITLE
|
|
>HTMLparser</TITLE
|
|
><META
|
|
NAME="GENERATOR"
|
|
CONTENT="Modular DocBook HTML Stylesheet Version 1.33"><LINK
|
|
REL="HOME"
|
|
TITLE="Gnome XML Library Reference Manual"
|
|
HREF="book1.html"><LINK
|
|
REL="UP"
|
|
TITLE="Libxml Library Reference"
|
|
HREF="libxml-lib.html"><LINK
|
|
REL="PREVIOUS"
|
|
TITLE="xml-error"
|
|
HREF="gnome-xml-xml-error.html"><LINK
|
|
REL="NEXT"
|
|
TITLE="HTMLtree"
|
|
HREF="gnome-xml-htmltree.html"></HEAD
|
|
><BODY
|
|
BGCOLOR="#FFFFFF"
|
|
TEXT="#000000"
|
|
><DIV
|
|
CLASS="NAVHEADER"
|
|
><TABLE
|
|
WIDTH="100%"
|
|
BORDER="0"
|
|
BGCOLOR="#000000"
|
|
CELLPADDING="1"
|
|
CELLSPACING="0"
|
|
><TR
|
|
><TH
|
|
COLSPAN="4"
|
|
ALIGN="center"
|
|
><FONT
|
|
COLOR="#FFFFFF"
|
|
SIZE="5"
|
|
>Gnome XML Library Reference Manual</FONT
|
|
></TH
|
|
></TR
|
|
><TR
|
|
><TD
|
|
WIDTH="25%"
|
|
BGCOLOR="#C00000"
|
|
ALIGN="left"
|
|
><A
|
|
HREF="gnome-xml-xml-error.html"
|
|
><FONT
|
|
COLOR="#FFFFFF"
|
|
SIZE="3"
|
|
><B
|
|
><<< Previous Page</B
|
|
></FONT
|
|
></A
|
|
></TD
|
|
><TD
|
|
WIDTH="25%"
|
|
BGCOLOR="#0000C0"
|
|
ALIGN="center"
|
|
><FONT
|
|
COLOR="#FFFFFF"
|
|
SIZE="3"
|
|
><B
|
|
><A
|
|
HREF="book1.html"
|
|
><FONT
|
|
COLOR="#FFFFFF"
|
|
SIZE="3"
|
|
><B
|
|
>Home</B
|
|
></FONT
|
|
></A
|
|
></B
|
|
></FONT
|
|
></TD
|
|
><TD
|
|
WIDTH="25%"
|
|
BGCOLOR="#00C000"
|
|
ALIGN="center"
|
|
><FONT
|
|
COLOR="#FFFFFF"
|
|
SIZE="3"
|
|
><B
|
|
><A
|
|
HREF="libxml-lib.html"
|
|
><FONT
|
|
COLOR="#FFFFFF"
|
|
SIZE="3"
|
|
><B
|
|
>Up</B
|
|
></FONT
|
|
></A
|
|
></B
|
|
></FONT
|
|
></TD
|
|
><TD
|
|
WIDTH="25%"
|
|
BGCOLOR="#C00000"
|
|
ALIGN="right"
|
|
><A
|
|
HREF="gnome-xml-htmltree.html"
|
|
><FONT
|
|
COLOR="#FFFFFF"
|
|
SIZE="3"
|
|
><B
|
|
>Next Page >>></B
|
|
></FONT
|
|
></A
|
|
></TD
|
|
></TR
|
|
></TABLE
|
|
></DIV
|
|
><H1
|
|
>HTMLparser</H1
|
|
><DIV
|
|
CLASS="REFNAMEDIV"
|
|
><A
|
|
NAME="AEN6306"
|
|
></A
|
|
><H2
|
|
>Name</H2
|
|
>HTMLparser — </DIV
|
|
><DIV
|
|
CLASS="REFSYNOPSISDIV"
|
|
><A
|
|
NAME="AEN6309"
|
|
></A
|
|
><H2
|
|
>Synopsis</H2
|
|
><TABLE
|
|
BORDER="0"
|
|
BGCOLOR="#D6E8FF"
|
|
WIDTH="100%"
|
|
CELLPADDING="6"
|
|
><TR
|
|
><TD
|
|
><PRE
|
|
CLASS="SYNOPSIS"
|
|
>
|
|
|
|
typedef <A
|
|
HREF="gnome-xml-htmlparser.html#HTMLPARSERCTXT"
|
|
>htmlParserCtxt</A
|
|
>;
|
|
typedef <A
|
|
HREF="gnome-xml-htmlparser.html#HTMLPARSERCTXTPTR"
|
|
>htmlParserCtxtPtr</A
|
|
>;
|
|
typedef <A
|
|
HREF="gnome-xml-htmlparser.html#HTMLPARSERNODEINFO"
|
|
>htmlParserNodeInfo</A
|
|
>;
|
|
typedef <A
|
|
HREF="gnome-xml-htmlparser.html#HTMLSAXHANDLER"
|
|
>htmlSAXHandler</A
|
|
>;
|
|
typedef <A
|
|
HREF="gnome-xml-htmlparser.html#HTMLSAXHANDLERPTR"
|
|
>htmlSAXHandlerPtr</A
|
|
>;
|
|
typedef <A
|
|
HREF="gnome-xml-htmlparser.html#HTMLPARSERINPUT"
|
|
>htmlParserInput</A
|
|
>;
|
|
typedef <A
|
|
HREF="gnome-xml-htmlparser.html#HTMLPARSERINPUTPTR"
|
|
>htmlParserInputPtr</A
|
|
>;
|
|
typedef <A
|
|
HREF="gnome-xml-htmlparser.html#HTMLDOCPTR"
|
|
>htmlDocPtr</A
|
|
>;
|
|
typedef <A
|
|
HREF="gnome-xml-htmlparser.html#HTMLNODEPTR"
|
|
>htmlNodePtr</A
|
|
>;
|
|
<GTKDOCLINK
|
|
HREF="HTMLELEMDESCPTR"
|
|
>htmlElemDescPtr</GTKDOCLINK
|
|
> <A
|
|
HREF="gnome-xml-htmlparser.html#HTMLTAGLOOKUP"
|
|
>htmlTagLookup</A
|
|
> (const <A
|
|
HREF="gnome-xml-tree.html#XMLCHAR"
|
|
>xmlChar</A
|
|
> *tag);
|
|
<GTKDOCLINK
|
|
HREF="HTMLENTITYDESCPTR"
|
|
>htmlEntityDescPtr</GTKDOCLINK
|
|
> <A
|
|
HREF="gnome-xml-htmlparser.html#HTMLENTITYLOOKUP"
|
|
>htmlEntityLookup</A
|
|
> (const <A
|
|
HREF="gnome-xml-tree.html#XMLCHAR"
|
|
>xmlChar</A
|
|
> *name);
|
|
<GTKDOCLINK
|
|
HREF="HTMLENTITYDESCPTR"
|
|
>htmlEntityDescPtr</GTKDOCLINK
|
|
> <A
|
|
HREF="gnome-xml-htmlparser.html#HTMLPARSEENTITYREF"
|
|
>htmlParseEntityRef</A
|
|
> (<A
|
|
HREF="gnome-xml-htmlparser.html#HTMLPARSERCTXTPTR"
|
|
>htmlParserCtxtPtr</A
|
|
> ctxt,
|
|
<A
|
|
HREF="gnome-xml-tree.html#XMLCHAR"
|
|
>xmlChar</A
|
|
> **str);
|
|
int <A
|
|
HREF="gnome-xml-htmlparser.html#HTMLPARSECHARREF"
|
|
>htmlParseCharRef</A
|
|
> (<A
|
|
HREF="gnome-xml-htmlparser.html#HTMLPARSERCTXTPTR"
|
|
>htmlParserCtxtPtr</A
|
|
> ctxt);
|
|
void <A
|
|
HREF="gnome-xml-htmlparser.html#HTMLPARSEELEMENT"
|
|
>htmlParseElement</A
|
|
> (<A
|
|
HREF="gnome-xml-htmlparser.html#HTMLPARSERCTXTPTR"
|
|
>htmlParserCtxtPtr</A
|
|
> ctxt);
|
|
<A
|
|
HREF="gnome-xml-htmlparser.html#HTMLDOCPTR"
|
|
>htmlDocPtr</A
|
|
> <A
|
|
HREF="gnome-xml-htmlparser.html#HTMLSAXPARSEDOC"
|
|
>htmlSAXParseDoc</A
|
|
> (<A
|
|
HREF="gnome-xml-tree.html#XMLCHAR"
|
|
>xmlChar</A
|
|
> *cur,
|
|
const char *encoding,
|
|
<A
|
|
HREF="gnome-xml-htmlparser.html#HTMLSAXHANDLERPTR"
|
|
>htmlSAXHandlerPtr</A
|
|
> sax,
|
|
void *userData);
|
|
<A
|
|
HREF="gnome-xml-htmlparser.html#HTMLDOCPTR"
|
|
>htmlDocPtr</A
|
|
> <A
|
|
HREF="gnome-xml-htmlparser.html#HTMLPARSEDOC"
|
|
>htmlParseDoc</A
|
|
> (<A
|
|
HREF="gnome-xml-tree.html#XMLCHAR"
|
|
>xmlChar</A
|
|
> *cur,
|
|
const char *encoding);
|
|
<A
|
|
HREF="gnome-xml-htmlparser.html#HTMLDOCPTR"
|
|
>htmlDocPtr</A
|
|
> <A
|
|
HREF="gnome-xml-htmlparser.html#HTMLSAXPARSEFILE"
|
|
>htmlSAXParseFile</A
|
|
> (const char *filename,
|
|
const char *encoding,
|
|
<A
|
|
HREF="gnome-xml-htmlparser.html#HTMLSAXHANDLERPTR"
|
|
>htmlSAXHandlerPtr</A
|
|
> sax,
|
|
void *userData);
|
|
<A
|
|
HREF="gnome-xml-htmlparser.html#HTMLDOCPTR"
|
|
>htmlDocPtr</A
|
|
> <A
|
|
HREF="gnome-xml-htmlparser.html#HTMLPARSEFILE"
|
|
>htmlParseFile</A
|
|
> (const char *filename,
|
|
const char *encoding);</PRE
|
|
></TD
|
|
></TR
|
|
></TABLE
|
|
></DIV
|
|
><DIV
|
|
CLASS="REFSECT1"
|
|
><A
|
|
NAME="AEN6347"
|
|
></A
|
|
><H2
|
|
>Description</H2
|
|
><P
|
|
></P
|
|
></DIV
|
|
><DIV
|
|
CLASS="REFSECT1"
|
|
><A
|
|
NAME="AEN6350"
|
|
></A
|
|
><H2
|
|
>Details</H2
|
|
><DIV
|
|
CLASS="REFSECT2"
|
|
><A
|
|
NAME="AEN6352"
|
|
></A
|
|
><H3
|
|
><A
|
|
NAME="HTMLPARSERCTXT"
|
|
></A
|
|
>htmlParserCtxt</H3
|
|
><TABLE
|
|
BORDER="0"
|
|
BGCOLOR="#D6E8FF"
|
|
WIDTH="100%"
|
|
CELLPADDING="6"
|
|
><TR
|
|
><TD
|
|
><PRE
|
|
CLASS="PROGRAMLISTING"
|
|
>typedef xmlParserCtxt htmlParserCtxt;</PRE
|
|
></TD
|
|
></TR
|
|
></TABLE
|
|
><P
|
|
></P
|
|
></DIV
|
|
><HR><DIV
|
|
CLASS="REFSECT2"
|
|
><A
|
|
NAME="AEN6357"
|
|
></A
|
|
><H3
|
|
><A
|
|
NAME="HTMLPARSERCTXTPTR"
|
|
></A
|
|
>htmlParserCtxtPtr</H3
|
|
><TABLE
|
|
BORDER="0"
|
|
BGCOLOR="#D6E8FF"
|
|
WIDTH="100%"
|
|
CELLPADDING="6"
|
|
><TR
|
|
><TD
|
|
><PRE
|
|
CLASS="PROGRAMLISTING"
|
|
>typedef xmlParserCtxtPtr htmlParserCtxtPtr;</PRE
|
|
></TD
|
|
></TR
|
|
></TABLE
|
|
><P
|
|
></P
|
|
></DIV
|
|
><HR><DIV
|
|
CLASS="REFSECT2"
|
|
><A
|
|
NAME="AEN6362"
|
|
></A
|
|
><H3
|
|
><A
|
|
NAME="HTMLPARSERNODEINFO"
|
|
></A
|
|
>htmlParserNodeInfo</H3
|
|
><TABLE
|
|
BORDER="0"
|
|
BGCOLOR="#D6E8FF"
|
|
WIDTH="100%"
|
|
CELLPADDING="6"
|
|
><TR
|
|
><TD
|
|
><PRE
|
|
CLASS="PROGRAMLISTING"
|
|
>typedef xmlParserNodeInfo htmlParserNodeInfo;</PRE
|
|
></TD
|
|
></TR
|
|
></TABLE
|
|
><P
|
|
></P
|
|
></DIV
|
|
><HR><DIV
|
|
CLASS="REFSECT2"
|
|
><A
|
|
NAME="AEN6367"
|
|
></A
|
|
><H3
|
|
><A
|
|
NAME="HTMLSAXHANDLER"
|
|
></A
|
|
>htmlSAXHandler</H3
|
|
><TABLE
|
|
BORDER="0"
|
|
BGCOLOR="#D6E8FF"
|
|
WIDTH="100%"
|
|
CELLPADDING="6"
|
|
><TR
|
|
><TD
|
|
><PRE
|
|
CLASS="PROGRAMLISTING"
|
|
>typedef xmlSAXHandler htmlSAXHandler;</PRE
|
|
></TD
|
|
></TR
|
|
></TABLE
|
|
><P
|
|
></P
|
|
></DIV
|
|
><HR><DIV
|
|
CLASS="REFSECT2"
|
|
><A
|
|
NAME="AEN6372"
|
|
></A
|
|
><H3
|
|
><A
|
|
NAME="HTMLSAXHANDLERPTR"
|
|
></A
|
|
>htmlSAXHandlerPtr</H3
|
|
><TABLE
|
|
BORDER="0"
|
|
BGCOLOR="#D6E8FF"
|
|
WIDTH="100%"
|
|
CELLPADDING="6"
|
|
><TR
|
|
><TD
|
|
><PRE
|
|
CLASS="PROGRAMLISTING"
|
|
>typedef xmlSAXHandlerPtr htmlSAXHandlerPtr;</PRE
|
|
></TD
|
|
></TR
|
|
></TABLE
|
|
><P
|
|
></P
|
|
></DIV
|
|
><HR><DIV
|
|
CLASS="REFSECT2"
|
|
><A
|
|
NAME="AEN6377"
|
|
></A
|
|
><H3
|
|
><A
|
|
NAME="HTMLPARSERINPUT"
|
|
></A
|
|
>htmlParserInput</H3
|
|
><TABLE
|
|
BORDER="0"
|
|
BGCOLOR="#D6E8FF"
|
|
WIDTH="100%"
|
|
CELLPADDING="6"
|
|
><TR
|
|
><TD
|
|
><PRE
|
|
CLASS="PROGRAMLISTING"
|
|
>typedef xmlParserInput htmlParserInput;</PRE
|
|
></TD
|
|
></TR
|
|
></TABLE
|
|
><P
|
|
></P
|
|
></DIV
|
|
><HR><DIV
|
|
CLASS="REFSECT2"
|
|
><A
|
|
NAME="AEN6382"
|
|
></A
|
|
><H3
|
|
><A
|
|
NAME="HTMLPARSERINPUTPTR"
|
|
></A
|
|
>htmlParserInputPtr</H3
|
|
><TABLE
|
|
BORDER="0"
|
|
BGCOLOR="#D6E8FF"
|
|
WIDTH="100%"
|
|
CELLPADDING="6"
|
|
><TR
|
|
><TD
|
|
><PRE
|
|
CLASS="PROGRAMLISTING"
|
|
>typedef xmlParserInputPtr htmlParserInputPtr;</PRE
|
|
></TD
|
|
></TR
|
|
></TABLE
|
|
><P
|
|
></P
|
|
></DIV
|
|
><HR><DIV
|
|
CLASS="REFSECT2"
|
|
><A
|
|
NAME="AEN6387"
|
|
></A
|
|
><H3
|
|
><A
|
|
NAME="HTMLDOCPTR"
|
|
></A
|
|
>htmlDocPtr</H3
|
|
><TABLE
|
|
BORDER="0"
|
|
BGCOLOR="#D6E8FF"
|
|
WIDTH="100%"
|
|
CELLPADDING="6"
|
|
><TR
|
|
><TD
|
|
><PRE
|
|
CLASS="PROGRAMLISTING"
|
|
>typedef xmlDocPtr htmlDocPtr;</PRE
|
|
></TD
|
|
></TR
|
|
></TABLE
|
|
><P
|
|
></P
|
|
></DIV
|
|
><HR><DIV
|
|
CLASS="REFSECT2"
|
|
><A
|
|
NAME="AEN6392"
|
|
></A
|
|
><H3
|
|
><A
|
|
NAME="HTMLNODEPTR"
|
|
></A
|
|
>htmlNodePtr</H3
|
|
><TABLE
|
|
BORDER="0"
|
|
BGCOLOR="#D6E8FF"
|
|
WIDTH="100%"
|
|
CELLPADDING="6"
|
|
><TR
|
|
><TD
|
|
><PRE
|
|
CLASS="PROGRAMLISTING"
|
|
>typedef xmlNodePtr htmlNodePtr;</PRE
|
|
></TD
|
|
></TR
|
|
></TABLE
|
|
><P
|
|
></P
|
|
></DIV
|
|
><HR><DIV
|
|
CLASS="REFSECT2"
|
|
><A
|
|
NAME="AEN6397"
|
|
></A
|
|
><H3
|
|
><A
|
|
NAME="HTMLTAGLOOKUP"
|
|
></A
|
|
>htmlTagLookup ()</H3
|
|
><TABLE
|
|
BORDER="0"
|
|
BGCOLOR="#D6E8FF"
|
|
WIDTH="100%"
|
|
CELLPADDING="6"
|
|
><TR
|
|
><TD
|
|
><PRE
|
|
CLASS="PROGRAMLISTING"
|
|
><GTKDOCLINK
|
|
HREF="HTMLELEMDESCPTR"
|
|
>htmlElemDescPtr</GTKDOCLINK
|
|
> htmlTagLookup (const <A
|
|
HREF="gnome-xml-tree.html#XMLCHAR"
|
|
>xmlChar</A
|
|
> *tag);</PRE
|
|
></TD
|
|
></TR
|
|
></TABLE
|
|
><P
|
|
>Lookup the HTML tag in the ElementTable</P
|
|
><P
|
|
></P
|
|
><DIV
|
|
CLASS="INFORMALTABLE"
|
|
><P
|
|
></P
|
|
><TABLE
|
|
BORDER="0"
|
|
WIDTH="100%"
|
|
BGCOLOR="#FFD0D0"
|
|
CELLSPACING="0"
|
|
CELLPADDING="4"
|
|
CLASS="CALSTABLE"
|
|
><TR
|
|
><TD
|
|
WIDTH="20%"
|
|
ALIGN="RIGHT"
|
|
VALIGN="TOP"
|
|
><TT
|
|
CLASS="PARAMETER"
|
|
><I
|
|
>tag</I
|
|
></TT
|
|
> :</TD
|
|
><TD
|
|
WIDTH="80%"
|
|
ALIGN="LEFT"
|
|
VALIGN="TOP"
|
|
> The tag name</TD
|
|
></TR
|
|
><TR
|
|
><TD
|
|
WIDTH="20%"
|
|
ALIGN="RIGHT"
|
|
VALIGN="TOP"
|
|
><I
|
|
CLASS="EMPHASIS"
|
|
>Returns</I
|
|
> :</TD
|
|
><TD
|
|
WIDTH="80%"
|
|
ALIGN="LEFT"
|
|
VALIGN="TOP"
|
|
>the related htmlElemDescPtr or NULL if not found.</TD
|
|
></TR
|
|
></TABLE
|
|
><P
|
|
></P
|
|
></DIV
|
|
></DIV
|
|
><HR><DIV
|
|
CLASS="REFSECT2"
|
|
><A
|
|
NAME="AEN6418"
|
|
></A
|
|
><H3
|
|
><A
|
|
NAME="HTMLENTITYLOOKUP"
|
|
></A
|
|
>htmlEntityLookup ()</H3
|
|
><TABLE
|
|
BORDER="0"
|
|
BGCOLOR="#D6E8FF"
|
|
WIDTH="100%"
|
|
CELLPADDING="6"
|
|
><TR
|
|
><TD
|
|
><PRE
|
|
CLASS="PROGRAMLISTING"
|
|
><GTKDOCLINK
|
|
HREF="HTMLENTITYDESCPTR"
|
|
>htmlEntityDescPtr</GTKDOCLINK
|
|
> htmlEntityLookup (const <A
|
|
HREF="gnome-xml-tree.html#XMLCHAR"
|
|
>xmlChar</A
|
|
> *name);</PRE
|
|
></TD
|
|
></TR
|
|
></TABLE
|
|
><P
|
|
>Lookup the given entity in EntitiesTable</P
|
|
><P
|
|
>TODO: the linear scan is really ugly, an hash table is really needed.</P
|
|
><P
|
|
></P
|
|
><DIV
|
|
CLASS="INFORMALTABLE"
|
|
><P
|
|
></P
|
|
><TABLE
|
|
BORDER="0"
|
|
WIDTH="100%"
|
|
BGCOLOR="#FFD0D0"
|
|
CELLSPACING="0"
|
|
CELLPADDING="4"
|
|
CLASS="CALSTABLE"
|
|
><TR
|
|
><TD
|
|
WIDTH="20%"
|
|
ALIGN="RIGHT"
|
|
VALIGN="TOP"
|
|
><TT
|
|
CLASS="PARAMETER"
|
|
><I
|
|
>name</I
|
|
></TT
|
|
> :</TD
|
|
><TD
|
|
WIDTH="80%"
|
|
ALIGN="LEFT"
|
|
VALIGN="TOP"
|
|
> the entity name</TD
|
|
></TR
|
|
><TR
|
|
><TD
|
|
WIDTH="20%"
|
|
ALIGN="RIGHT"
|
|
VALIGN="TOP"
|
|
><I
|
|
CLASS="EMPHASIS"
|
|
>Returns</I
|
|
> :</TD
|
|
><TD
|
|
WIDTH="80%"
|
|
ALIGN="LEFT"
|
|
VALIGN="TOP"
|
|
>the associated htmlEntityDescPtr if found, NULL otherwise.</TD
|
|
></TR
|
|
></TABLE
|
|
><P
|
|
></P
|
|
></DIV
|
|
></DIV
|
|
><HR><DIV
|
|
CLASS="REFSECT2"
|
|
><A
|
|
NAME="AEN6440"
|
|
></A
|
|
><H3
|
|
><A
|
|
NAME="HTMLPARSEENTITYREF"
|
|
></A
|
|
>htmlParseEntityRef ()</H3
|
|
><TABLE
|
|
BORDER="0"
|
|
BGCOLOR="#D6E8FF"
|
|
WIDTH="100%"
|
|
CELLPADDING="6"
|
|
><TR
|
|
><TD
|
|
><PRE
|
|
CLASS="PROGRAMLISTING"
|
|
><GTKDOCLINK
|
|
HREF="HTMLENTITYDESCPTR"
|
|
>htmlEntityDescPtr</GTKDOCLINK
|
|
> htmlParseEntityRef (<A
|
|
HREF="gnome-xml-htmlparser.html#HTMLPARSERCTXTPTR"
|
|
>htmlParserCtxtPtr</A
|
|
> ctxt,
|
|
<A
|
|
HREF="gnome-xml-tree.html#XMLCHAR"
|
|
>xmlChar</A
|
|
> **str);</PRE
|
|
></TD
|
|
></TR
|
|
></TABLE
|
|
><P
|
|
>parse an HTML ENTITY references</P
|
|
><P
|
|
>[68] EntityRef ::= '&' Name ';'</P
|
|
><P
|
|
></P
|
|
><DIV
|
|
CLASS="INFORMALTABLE"
|
|
><P
|
|
></P
|
|
><TABLE
|
|
BORDER="0"
|
|
WIDTH="100%"
|
|
BGCOLOR="#FFD0D0"
|
|
CELLSPACING="0"
|
|
CELLPADDING="4"
|
|
CLASS="CALSTABLE"
|
|
><TR
|
|
><TD
|
|
WIDTH="20%"
|
|
ALIGN="RIGHT"
|
|
VALIGN="TOP"
|
|
><TT
|
|
CLASS="PARAMETER"
|
|
><I
|
|
>ctxt</I
|
|
></TT
|
|
> :</TD
|
|
><TD
|
|
WIDTH="80%"
|
|
ALIGN="LEFT"
|
|
VALIGN="TOP"
|
|
> an HTML parser context</TD
|
|
></TR
|
|
><TR
|
|
><TD
|
|
WIDTH="20%"
|
|
ALIGN="RIGHT"
|
|
VALIGN="TOP"
|
|
><TT
|
|
CLASS="PARAMETER"
|
|
><I
|
|
>str</I
|
|
></TT
|
|
> :</TD
|
|
><TD
|
|
WIDTH="80%"
|
|
ALIGN="LEFT"
|
|
VALIGN="TOP"
|
|
> location to store the entity name</TD
|
|
></TR
|
|
><TR
|
|
><TD
|
|
WIDTH="20%"
|
|
ALIGN="RIGHT"
|
|
VALIGN="TOP"
|
|
><I
|
|
CLASS="EMPHASIS"
|
|
>Returns</I
|
|
> :</TD
|
|
><TD
|
|
WIDTH="80%"
|
|
ALIGN="LEFT"
|
|
VALIGN="TOP"
|
|
>the associated htmlEntityDescPtr if found, or NULL otherwise,
|
|
if non-NULL *str will have to be freed by the caller.</TD
|
|
></TR
|
|
></TABLE
|
|
><P
|
|
></P
|
|
></DIV
|
|
></DIV
|
|
><HR><DIV
|
|
CLASS="REFSECT2"
|
|
><A
|
|
NAME="AEN6467"
|
|
></A
|
|
><H3
|
|
><A
|
|
NAME="HTMLPARSECHARREF"
|
|
></A
|
|
>htmlParseCharRef ()</H3
|
|
><TABLE
|
|
BORDER="0"
|
|
BGCOLOR="#D6E8FF"
|
|
WIDTH="100%"
|
|
CELLPADDING="6"
|
|
><TR
|
|
><TD
|
|
><PRE
|
|
CLASS="PROGRAMLISTING"
|
|
>int htmlParseCharRef (<A
|
|
HREF="gnome-xml-htmlparser.html#HTMLPARSERCTXTPTR"
|
|
>htmlParserCtxtPtr</A
|
|
> ctxt);</PRE
|
|
></TD
|
|
></TR
|
|
></TABLE
|
|
><P
|
|
>parse Reference declarations</P
|
|
><P
|
|
>[66] CharRef ::= '&#' [0-9]+ ';' |
|
|
'&<GTKDOCLINK
|
|
HREF="X"
|
|
>x</GTKDOCLINK
|
|
>' [0-9a-fA-F]+ ';'</P
|
|
><P
|
|
></P
|
|
><DIV
|
|
CLASS="INFORMALTABLE"
|
|
><P
|
|
></P
|
|
><TABLE
|
|
BORDER="0"
|
|
WIDTH="100%"
|
|
BGCOLOR="#FFD0D0"
|
|
CELLSPACING="0"
|
|
CELLPADDING="4"
|
|
CLASS="CALSTABLE"
|
|
><TR
|
|
><TD
|
|
WIDTH="20%"
|
|
ALIGN="RIGHT"
|
|
VALIGN="TOP"
|
|
><TT
|
|
CLASS="PARAMETER"
|
|
><I
|
|
>ctxt</I
|
|
></TT
|
|
> :</TD
|
|
><TD
|
|
WIDTH="80%"
|
|
ALIGN="LEFT"
|
|
VALIGN="TOP"
|
|
> an HTML parser context</TD
|
|
></TR
|
|
><TR
|
|
><TD
|
|
WIDTH="20%"
|
|
ALIGN="RIGHT"
|
|
VALIGN="TOP"
|
|
><I
|
|
CLASS="EMPHASIS"
|
|
>Returns</I
|
|
> :</TD
|
|
><TD
|
|
WIDTH="80%"
|
|
ALIGN="LEFT"
|
|
VALIGN="TOP"
|
|
>the value parsed (as an int)</TD
|
|
></TR
|
|
></TABLE
|
|
><P
|
|
></P
|
|
></DIV
|
|
></DIV
|
|
><HR><DIV
|
|
CLASS="REFSECT2"
|
|
><A
|
|
NAME="AEN6489"
|
|
></A
|
|
><H3
|
|
><A
|
|
NAME="HTMLPARSEELEMENT"
|
|
></A
|
|
>htmlParseElement ()</H3
|
|
><TABLE
|
|
BORDER="0"
|
|
BGCOLOR="#D6E8FF"
|
|
WIDTH="100%"
|
|
CELLPADDING="6"
|
|
><TR
|
|
><TD
|
|
><PRE
|
|
CLASS="PROGRAMLISTING"
|
|
>void htmlParseElement (<A
|
|
HREF="gnome-xml-htmlparser.html#HTMLPARSERCTXTPTR"
|
|
>htmlParserCtxtPtr</A
|
|
> ctxt);</PRE
|
|
></TD
|
|
></TR
|
|
></TABLE
|
|
><P
|
|
>parse an HTML element, this is highly recursive</P
|
|
><P
|
|
>[39] element ::= EmptyElemTag | STag content ETag</P
|
|
><P
|
|
>[41] Attribute ::= Name Eq AttValue</P
|
|
><P
|
|
></P
|
|
><DIV
|
|
CLASS="INFORMALTABLE"
|
|
><P
|
|
></P
|
|
><TABLE
|
|
BORDER="0"
|
|
WIDTH="100%"
|
|
BGCOLOR="#FFD0D0"
|
|
CELLSPACING="0"
|
|
CELLPADDING="4"
|
|
CLASS="CALSTABLE"
|
|
><TR
|
|
><TD
|
|
WIDTH="20%"
|
|
ALIGN="RIGHT"
|
|
VALIGN="TOP"
|
|
><TT
|
|
CLASS="PARAMETER"
|
|
><I
|
|
>ctxt</I
|
|
></TT
|
|
> :</TD
|
|
><TD
|
|
WIDTH="80%"
|
|
ALIGN="LEFT"
|
|
VALIGN="TOP"
|
|
> an HTML parser context</TD
|
|
></TR
|
|
></TABLE
|
|
><P
|
|
></P
|
|
></DIV
|
|
></DIV
|
|
><HR><DIV
|
|
CLASS="REFSECT2"
|
|
><A
|
|
NAME="AEN6507"
|
|
></A
|
|
><H3
|
|
><A
|
|
NAME="HTMLSAXPARSEDOC"
|
|
></A
|
|
>htmlSAXParseDoc ()</H3
|
|
><TABLE
|
|
BORDER="0"
|
|
BGCOLOR="#D6E8FF"
|
|
WIDTH="100%"
|
|
CELLPADDING="6"
|
|
><TR
|
|
><TD
|
|
><PRE
|
|
CLASS="PROGRAMLISTING"
|
|
><A
|
|
HREF="gnome-xml-htmlparser.html#HTMLDOCPTR"
|
|
>htmlDocPtr</A
|
|
> htmlSAXParseDoc (<A
|
|
HREF="gnome-xml-tree.html#XMLCHAR"
|
|
>xmlChar</A
|
|
> *cur,
|
|
const char *encoding,
|
|
<A
|
|
HREF="gnome-xml-htmlparser.html#HTMLSAXHANDLERPTR"
|
|
>htmlSAXHandlerPtr</A
|
|
> sax,
|
|
void *userData);</PRE
|
|
></TD
|
|
></TR
|
|
></TABLE
|
|
><P
|
|
>parse an HTML in-memory document and build a tree.
|
|
It use the given SAX function block to handle the parsing callback.
|
|
If sax is NULL, fallback to the default DOM tree building routines.</P
|
|
><P
|
|
></P
|
|
><DIV
|
|
CLASS="INFORMALTABLE"
|
|
><P
|
|
></P
|
|
><TABLE
|
|
BORDER="0"
|
|
WIDTH="100%"
|
|
BGCOLOR="#FFD0D0"
|
|
CELLSPACING="0"
|
|
CELLPADDING="4"
|
|
CLASS="CALSTABLE"
|
|
><TR
|
|
><TD
|
|
WIDTH="20%"
|
|
ALIGN="RIGHT"
|
|
VALIGN="TOP"
|
|
><TT
|
|
CLASS="PARAMETER"
|
|
><I
|
|
>cur</I
|
|
></TT
|
|
> :</TD
|
|
><TD
|
|
WIDTH="80%"
|
|
ALIGN="LEFT"
|
|
VALIGN="TOP"
|
|
> a pointer to an array of xmlChar</TD
|
|
></TR
|
|
><TR
|
|
><TD
|
|
WIDTH="20%"
|
|
ALIGN="RIGHT"
|
|
VALIGN="TOP"
|
|
><TT
|
|
CLASS="PARAMETER"
|
|
><I
|
|
>encoding</I
|
|
></TT
|
|
> :</TD
|
|
><TD
|
|
WIDTH="80%"
|
|
ALIGN="LEFT"
|
|
VALIGN="TOP"
|
|
> a free form C string describing the HTML document encoding, or NULL</TD
|
|
></TR
|
|
><TR
|
|
><TD
|
|
WIDTH="20%"
|
|
ALIGN="RIGHT"
|
|
VALIGN="TOP"
|
|
><TT
|
|
CLASS="PARAMETER"
|
|
><I
|
|
>sax</I
|
|
></TT
|
|
> :</TD
|
|
><TD
|
|
WIDTH="80%"
|
|
ALIGN="LEFT"
|
|
VALIGN="TOP"
|
|
> the SAX handler block</TD
|
|
></TR
|
|
><TR
|
|
><TD
|
|
WIDTH="20%"
|
|
ALIGN="RIGHT"
|
|
VALIGN="TOP"
|
|
><TT
|
|
CLASS="PARAMETER"
|
|
><I
|
|
>userData</I
|
|
></TT
|
|
> :</TD
|
|
><TD
|
|
WIDTH="80%"
|
|
ALIGN="LEFT"
|
|
VALIGN="TOP"
|
|
> if using SAX, this pointer will be provided on callbacks. </TD
|
|
></TR
|
|
><TR
|
|
><TD
|
|
WIDTH="20%"
|
|
ALIGN="RIGHT"
|
|
VALIGN="TOP"
|
|
><I
|
|
CLASS="EMPHASIS"
|
|
>Returns</I
|
|
> :</TD
|
|
><TD
|
|
WIDTH="80%"
|
|
ALIGN="LEFT"
|
|
VALIGN="TOP"
|
|
>the resulting document tree</TD
|
|
></TR
|
|
></TABLE
|
|
><P
|
|
></P
|
|
></DIV
|
|
></DIV
|
|
><HR><DIV
|
|
CLASS="REFSECT2"
|
|
><A
|
|
NAME="AEN6541"
|
|
></A
|
|
><H3
|
|
><A
|
|
NAME="HTMLPARSEDOC"
|
|
></A
|
|
>htmlParseDoc ()</H3
|
|
><TABLE
|
|
BORDER="0"
|
|
BGCOLOR="#D6E8FF"
|
|
WIDTH="100%"
|
|
CELLPADDING="6"
|
|
><TR
|
|
><TD
|
|
><PRE
|
|
CLASS="PROGRAMLISTING"
|
|
><A
|
|
HREF="gnome-xml-htmlparser.html#HTMLDOCPTR"
|
|
>htmlDocPtr</A
|
|
> htmlParseDoc (<A
|
|
HREF="gnome-xml-tree.html#XMLCHAR"
|
|
>xmlChar</A
|
|
> *cur,
|
|
const char *encoding);</PRE
|
|
></TD
|
|
></TR
|
|
></TABLE
|
|
><P
|
|
>parse an HTML in-memory document and build a tree.</P
|
|
><P
|
|
></P
|
|
><DIV
|
|
CLASS="INFORMALTABLE"
|
|
><P
|
|
></P
|
|
><TABLE
|
|
BORDER="0"
|
|
WIDTH="100%"
|
|
BGCOLOR="#FFD0D0"
|
|
CELLSPACING="0"
|
|
CELLPADDING="4"
|
|
CLASS="CALSTABLE"
|
|
><TR
|
|
><TD
|
|
WIDTH="20%"
|
|
ALIGN="RIGHT"
|
|
VALIGN="TOP"
|
|
><TT
|
|
CLASS="PARAMETER"
|
|
><I
|
|
>cur</I
|
|
></TT
|
|
> :</TD
|
|
><TD
|
|
WIDTH="80%"
|
|
ALIGN="LEFT"
|
|
VALIGN="TOP"
|
|
> a pointer to an array of xmlChar</TD
|
|
></TR
|
|
><TR
|
|
><TD
|
|
WIDTH="20%"
|
|
ALIGN="RIGHT"
|
|
VALIGN="TOP"
|
|
><TT
|
|
CLASS="PARAMETER"
|
|
><I
|
|
>encoding</I
|
|
></TT
|
|
> :</TD
|
|
><TD
|
|
WIDTH="80%"
|
|
ALIGN="LEFT"
|
|
VALIGN="TOP"
|
|
> a free form C string describing the HTML document encoding, or NULL</TD
|
|
></TR
|
|
><TR
|
|
><TD
|
|
WIDTH="20%"
|
|
ALIGN="RIGHT"
|
|
VALIGN="TOP"
|
|
><I
|
|
CLASS="EMPHASIS"
|
|
>Returns</I
|
|
> :</TD
|
|
><TD
|
|
WIDTH="80%"
|
|
ALIGN="LEFT"
|
|
VALIGN="TOP"
|
|
>the resulting document tree</TD
|
|
></TR
|
|
></TABLE
|
|
><P
|
|
></P
|
|
></DIV
|
|
></DIV
|
|
><HR><DIV
|
|
CLASS="REFSECT2"
|
|
><A
|
|
NAME="AEN6566"
|
|
></A
|
|
><H3
|
|
><A
|
|
NAME="HTMLSAXPARSEFILE"
|
|
></A
|
|
>htmlSAXParseFile ()</H3
|
|
><TABLE
|
|
BORDER="0"
|
|
BGCOLOR="#D6E8FF"
|
|
WIDTH="100%"
|
|
CELLPADDING="6"
|
|
><TR
|
|
><TD
|
|
><PRE
|
|
CLASS="PROGRAMLISTING"
|
|
><A
|
|
HREF="gnome-xml-htmlparser.html#HTMLDOCPTR"
|
|
>htmlDocPtr</A
|
|
> htmlSAXParseFile (const char *filename,
|
|
const char *encoding,
|
|
<A
|
|
HREF="gnome-xml-htmlparser.html#HTMLSAXHANDLERPTR"
|
|
>htmlSAXHandlerPtr</A
|
|
> sax,
|
|
void *userData);</PRE
|
|
></TD
|
|
></TR
|
|
></TABLE
|
|
><P
|
|
>parse an HTML file and build a tree. Automatic support for ZLIB/Compress
|
|
compressed document is provided by default if found at compile-time.
|
|
It use the given SAX function block to handle the parsing callback.
|
|
If sax is NULL, fallback to the default DOM tree building routines.</P
|
|
><P
|
|
></P
|
|
><DIV
|
|
CLASS="INFORMALTABLE"
|
|
><P
|
|
></P
|
|
><TABLE
|
|
BORDER="0"
|
|
WIDTH="100%"
|
|
BGCOLOR="#FFD0D0"
|
|
CELLSPACING="0"
|
|
CELLPADDING="4"
|
|
CLASS="CALSTABLE"
|
|
><TR
|
|
><TD
|
|
WIDTH="20%"
|
|
ALIGN="RIGHT"
|
|
VALIGN="TOP"
|
|
><TT
|
|
CLASS="PARAMETER"
|
|
><I
|
|
>filename</I
|
|
></TT
|
|
> :</TD
|
|
><TD
|
|
WIDTH="80%"
|
|
ALIGN="LEFT"
|
|
VALIGN="TOP"
|
|
> the filename</TD
|
|
></TR
|
|
><TR
|
|
><TD
|
|
WIDTH="20%"
|
|
ALIGN="RIGHT"
|
|
VALIGN="TOP"
|
|
><TT
|
|
CLASS="PARAMETER"
|
|
><I
|
|
>encoding</I
|
|
></TT
|
|
> :</TD
|
|
><TD
|
|
WIDTH="80%"
|
|
ALIGN="LEFT"
|
|
VALIGN="TOP"
|
|
> a free form C string describing the HTML document encoding, or NULL</TD
|
|
></TR
|
|
><TR
|
|
><TD
|
|
WIDTH="20%"
|
|
ALIGN="RIGHT"
|
|
VALIGN="TOP"
|
|
><TT
|
|
CLASS="PARAMETER"
|
|
><I
|
|
>sax</I
|
|
></TT
|
|
> :</TD
|
|
><TD
|
|
WIDTH="80%"
|
|
ALIGN="LEFT"
|
|
VALIGN="TOP"
|
|
> the SAX handler block</TD
|
|
></TR
|
|
><TR
|
|
><TD
|
|
WIDTH="20%"
|
|
ALIGN="RIGHT"
|
|
VALIGN="TOP"
|
|
><TT
|
|
CLASS="PARAMETER"
|
|
><I
|
|
>userData</I
|
|
></TT
|
|
> :</TD
|
|
><TD
|
|
WIDTH="80%"
|
|
ALIGN="LEFT"
|
|
VALIGN="TOP"
|
|
> if using SAX, this pointer will be provided on callbacks. </TD
|
|
></TR
|
|
><TR
|
|
><TD
|
|
WIDTH="20%"
|
|
ALIGN="RIGHT"
|
|
VALIGN="TOP"
|
|
><I
|
|
CLASS="EMPHASIS"
|
|
>Returns</I
|
|
> :</TD
|
|
><TD
|
|
WIDTH="80%"
|
|
ALIGN="LEFT"
|
|
VALIGN="TOP"
|
|
>the resulting document tree</TD
|
|
></TR
|
|
></TABLE
|
|
><P
|
|
></P
|
|
></DIV
|
|
></DIV
|
|
><HR><DIV
|
|
CLASS="REFSECT2"
|
|
><A
|
|
NAME="AEN6599"
|
|
></A
|
|
><H3
|
|
><A
|
|
NAME="HTMLPARSEFILE"
|
|
></A
|
|
>htmlParseFile ()</H3
|
|
><TABLE
|
|
BORDER="0"
|
|
BGCOLOR="#D6E8FF"
|
|
WIDTH="100%"
|
|
CELLPADDING="6"
|
|
><TR
|
|
><TD
|
|
><PRE
|
|
CLASS="PROGRAMLISTING"
|
|
><A
|
|
HREF="gnome-xml-htmlparser.html#HTMLDOCPTR"
|
|
>htmlDocPtr</A
|
|
> htmlParseFile (const char *filename,
|
|
const char *encoding);</PRE
|
|
></TD
|
|
></TR
|
|
></TABLE
|
|
><P
|
|
>parse an HTML file and build a tree. Automatic support for ZLIB/Compress
|
|
compressed document is provided by default if found at compile-time.</P
|
|
><P
|
|
></P
|
|
><DIV
|
|
CLASS="INFORMALTABLE"
|
|
><P
|
|
></P
|
|
><TABLE
|
|
BORDER="0"
|
|
WIDTH="100%"
|
|
BGCOLOR="#FFD0D0"
|
|
CELLSPACING="0"
|
|
CELLPADDING="4"
|
|
CLASS="CALSTABLE"
|
|
><TR
|
|
><TD
|
|
WIDTH="20%"
|
|
ALIGN="RIGHT"
|
|
VALIGN="TOP"
|
|
><TT
|
|
CLASS="PARAMETER"
|
|
><I
|
|
>filename</I
|
|
></TT
|
|
> :</TD
|
|
><TD
|
|
WIDTH="80%"
|
|
ALIGN="LEFT"
|
|
VALIGN="TOP"
|
|
> the filename</TD
|
|
></TR
|
|
><TR
|
|
><TD
|
|
WIDTH="20%"
|
|
ALIGN="RIGHT"
|
|
VALIGN="TOP"
|
|
><TT
|
|
CLASS="PARAMETER"
|
|
><I
|
|
>encoding</I
|
|
></TT
|
|
> :</TD
|
|
><TD
|
|
WIDTH="80%"
|
|
ALIGN="LEFT"
|
|
VALIGN="TOP"
|
|
> a free form C string describing the HTML document encoding, or NULL</TD
|
|
></TR
|
|
><TR
|
|
><TD
|
|
WIDTH="20%"
|
|
ALIGN="RIGHT"
|
|
VALIGN="TOP"
|
|
><I
|
|
CLASS="EMPHASIS"
|
|
>Returns</I
|
|
> :</TD
|
|
><TD
|
|
WIDTH="80%"
|
|
ALIGN="LEFT"
|
|
VALIGN="TOP"
|
|
>the resulting document tree</TD
|
|
></TR
|
|
></TABLE
|
|
><P
|
|
></P
|
|
></DIV
|
|
></DIV
|
|
></DIV
|
|
><DIV
|
|
CLASS="NAVFOOTER"
|
|
><BR
|
|
CLEAR="all"><BR><TABLE
|
|
WIDTH="100%"
|
|
BORDER="0"
|
|
BGCOLOR="#000000"
|
|
CELLPADDING="1"
|
|
CELLSPACING="0"
|
|
><TR
|
|
><TD
|
|
WIDTH="25%"
|
|
BGCOLOR="#C00000"
|
|
ALIGN="left"
|
|
><A
|
|
HREF="gnome-xml-xml-error.html"
|
|
><FONT
|
|
COLOR="#FFFFFF"
|
|
SIZE="3"
|
|
><B
|
|
><<< Previous Page</B
|
|
></FONT
|
|
></A
|
|
></TD
|
|
><TD
|
|
WIDTH="25%"
|
|
BGCOLOR="#0000C0"
|
|
ALIGN="center"
|
|
><FONT
|
|
COLOR="#FFFFFF"
|
|
SIZE="3"
|
|
><B
|
|
><A
|
|
HREF="book1.html"
|
|
><FONT
|
|
COLOR="#FFFFFF"
|
|
SIZE="3"
|
|
><B
|
|
>Home</B
|
|
></FONT
|
|
></A
|
|
></B
|
|
></FONT
|
|
></TD
|
|
><TD
|
|
WIDTH="25%"
|
|
BGCOLOR="#00C000"
|
|
ALIGN="center"
|
|
><FONT
|
|
COLOR="#FFFFFF"
|
|
SIZE="3"
|
|
><B
|
|
><A
|
|
HREF="libxml-lib.html"
|
|
><FONT
|
|
COLOR="#FFFFFF"
|
|
SIZE="3"
|
|
><B
|
|
>Up</B
|
|
></FONT
|
|
></A
|
|
></B
|
|
></FONT
|
|
></TD
|
|
><TD
|
|
WIDTH="25%"
|
|
BGCOLOR="#C00000"
|
|
ALIGN="right"
|
|
><A
|
|
HREF="gnome-xml-htmltree.html"
|
|
><FONT
|
|
COLOR="#FFFFFF"
|
|
SIZE="3"
|
|
><B
|
|
>Next Page >>></B
|
|
></FONT
|
|
></A
|
|
></TD
|
|
></TR
|
|
><TR
|
|
><TD
|
|
COLSPAN="2"
|
|
ALIGN="left"
|
|
><FONT
|
|
COLOR="#FFFFFF"
|
|
SIZE="3"
|
|
><B
|
|
>xml-error</B
|
|
></FONT
|
|
></TD
|
|
><TD
|
|
COLSPAN="2"
|
|
ALIGN="right"
|
|
><FONT
|
|
COLOR="#FFFFFF"
|
|
SIZE="3"
|
|
><B
|
|
>HTMLtree</B
|
|
></FONT
|
|
></TD
|
|
></TR
|
|
></TABLE
|
|
></DIV
|
|
></BODY
|
|
></HTML
|
|
> |