1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2024-10-26 20:25:14 +03:00
libxml2/HTMLparser.h
Daniel Veillard a6d8eb6256 Finally had a bit of time to resynch both trees:
- HTMLparser.[ch]: added a way to avoid adding automatically
  omitted tags. htmlHandleOmittedElem() allows to change the
  default handling.
- tree.[ch] xmllint.c: added xmlDocDumpFormatMemory() and
  xmlDocDumpFormatMemoryEnc(), uses memory functions for output
  of xmllint too when using --memory flag, added a memory test
  suite at the Makefile level.
- xpathInternals.h xpath.[ch] xpointer.c: fixed problems
  with namespace use when encountering QNames in XPath evalation,
  added xmlns() scheme in XPointer.
- nanoftp.c : incorporated a fix
- parser.c xmlIO.c: fixed problems raised with encoding when using
  the memory I/O
- parserInternals.c: closed bug 25934 reported by
  torsten.landschoff@innominate.de
- TODO: updated
Daniel
2000-12-27 10:46:47 +00:00

115 lines
3.2 KiB
C

/*
* HTMLparser.h : inf=terface for an HTML 4.0 non-verifying parser
*
* See Copyright for the status of this software.
*
* Daniel.Veillard@w3.org
*/
#ifndef __HTML_PARSER_H__
#define __HTML_PARSER_H__
#include <libxml/parser.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* Most of the back-end structures from XML and HTML are shared
*/
typedef xmlParserCtxt htmlParserCtxt;
typedef xmlParserCtxtPtr htmlParserCtxtPtr;
typedef xmlParserNodeInfo htmlParserNodeInfo;
typedef xmlSAXHandler htmlSAXHandler;
typedef xmlSAXHandlerPtr htmlSAXHandlerPtr;
typedef xmlParserInput htmlParserInput;
typedef xmlParserInputPtr htmlParserInputPtr;
typedef xmlDocPtr htmlDocPtr;
typedef xmlNodePtr htmlNodePtr;
/*
* Internal description of an HTML element
*/
typedef struct _htmlElemDesc htmlElemDesc;
typedef htmlElemDesc *htmlElemDescPtr;
struct _htmlElemDesc {
const char *name; /* The tag name */
int startTag; /* Whether the start tag can be implied */
int endTag; /* Whether the end tag can be implied */
int empty; /* Is this an empty element ? */
int depr; /* Is this a deprecated element ? */
int dtd; /* 1: only in Loose DTD, 2: only Frameset one */
const char *desc; /* the description */
};
/*
* Internal description of an HTML entity
*/
typedef struct _htmlEntityDesc htmlEntityDesc;
typedef htmlEntityDesc *htmlEntityDescPtr;
struct _htmlEntityDesc {
int value; /* the UNICODE value for the character */
const char *name; /* The entity name */
const char *desc; /* the description */
};
/*
* There is only few public functions.
*/
htmlElemDescPtr htmlTagLookup (const xmlChar *tag);
htmlEntityDescPtr htmlEntityLookup(const xmlChar *name);
htmlEntityDescPtr htmlEntityValueLookup(int value);
int htmlIsAutoClosed(htmlDocPtr doc,
htmlNodePtr elem);
int htmlAutoCloseTag(htmlDocPtr doc,
const xmlChar *name,
htmlNodePtr elem);
htmlEntityDescPtr htmlParseEntityRef(htmlParserCtxtPtr ctxt,
xmlChar **str);
int htmlParseCharRef(htmlParserCtxtPtr ctxt);
void htmlParseElement(htmlParserCtxtPtr ctxt);
htmlDocPtr htmlSAXParseDoc (xmlChar *cur,
const char *encoding,
htmlSAXHandlerPtr sax,
void *userData);
htmlDocPtr htmlParseDoc (xmlChar *cur,
const char *encoding);
htmlDocPtr htmlSAXParseFile(const char *filename,
const char *encoding,
htmlSAXHandlerPtr sax,
void *userData);
htmlDocPtr htmlParseFile (const char *filename,
const char *encoding);
int UTF8ToHtml (unsigned char* out,
int *outlen,
const unsigned char* in,
int *inlen);
int htmlEncodeEntities(unsigned char* out,
int *outlen,
const unsigned char* in,
int *inlen, int quoteChar);
int htmlIsScriptAttribute(const xmlChar *name);
int htmlHandleOmittedElem(int val);
/**
* Interfaces for the Push mode
*/
void htmlFreeParserCtxt (htmlParserCtxtPtr ctxt);
htmlParserCtxtPtr htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax,
void *user_data,
const char *chunk,
int size,
const char *filename,
xmlCharEncoding enc);
int htmlParseChunk (htmlParserCtxtPtr ctxt,
const char *chunk,
int size,
int terminate);
#ifdef __cplusplus
}
#endif
#endif /* __HTML_PARSER_H__ */