mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-02-05 05:47:00 +03:00
more cleanup of the HTML parser to force it to not bypass SAX, Daniel.
Ready for 2.1.1 it seems
This commit is contained in:
parent
3f6f7f64ce
commit
d83eb8212e
@ -1,3 +1,8 @@
|
||||
Fri Jun 30 20:29:08 MEST 2000
|
||||
|
||||
* HTMLparser.c HTMLtree.c SAX.c valid.c tree.h : more cleanup
|
||||
of the HTML parser to force it to not bypass SAX
|
||||
|
||||
Fri Jun 30 11:19:59 CEST 2000 Daniel Veillard <Daniel.Veillard@w3.org>
|
||||
|
||||
* win32config.h.in: updated
|
||||
|
35
HTMLparser.c
35
HTMLparser.c
@ -618,7 +618,7 @@ htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar *new) {
|
||||
*/
|
||||
void
|
||||
htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *new) {
|
||||
if (!strcmp(new, "html"))
|
||||
if (!xmlStrcmp(new, BAD_CAST"html"))
|
||||
return;
|
||||
if (ctxt->nameNr <= 0) {
|
||||
#ifdef DEBUG
|
||||
@ -628,12 +628,15 @@ htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *new) {
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
|
||||
ctxt->sax->startElement(ctxt->userData, BAD_CAST"html", NULL);
|
||||
}
|
||||
if ((!strcmp(new, "body")) || (!strcmp(new, "head")))
|
||||
if ((!xmlStrcmp(new, BAD_CAST"body")) || (!xmlStrcmp(new, BAD_CAST"head")))
|
||||
return;
|
||||
if (ctxt->nameNr <= 1) {
|
||||
if ((!strcmp(new, "script")) || (!strcmp(new, "style")) ||
|
||||
(!strcmp(new, "meta")) || (!strcmp(new, "link")) ||
|
||||
(!strcmp(new, "title")) || (!strcmp(new, "base"))) {
|
||||
if ((!xmlStrcmp(new, BAD_CAST"script")) ||
|
||||
(!xmlStrcmp(new, BAD_CAST"style")) ||
|
||||
(!xmlStrcmp(new, BAD_CAST"meta")) ||
|
||||
(!xmlStrcmp(new, BAD_CAST"link")) ||
|
||||
(!xmlStrcmp(new, BAD_CAST"title")) ||
|
||||
(!xmlStrcmp(new, BAD_CAST"base"))) {
|
||||
/*
|
||||
* dropped OBJECT ... i you put it first BODY will be
|
||||
* assumed !
|
||||
@ -2152,17 +2155,15 @@ htmlParseDocTypeDecl(htmlParserCtxtPtr ctxt) {
|
||||
ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
|
||||
ctxt->wellFormed = 0;
|
||||
/* We shouldn't try to resynchronize ... */
|
||||
} else {
|
||||
}
|
||||
NEXT;
|
||||
|
||||
/*
|
||||
* Create the document accordingly to the DOCTYPE
|
||||
* Create or update the document accordingly to the DOCTYPE
|
||||
*/
|
||||
if (ctxt->myDoc != NULL)
|
||||
xmlFreeDoc(ctxt->myDoc);
|
||||
|
||||
ctxt->myDoc = htmlNewDoc(URI, ExternalID);
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
|
||||
(!ctxt->disableSAX))
|
||||
ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
|
||||
|
||||
/*
|
||||
* Cleanup, since we don't use all those identifiers
|
||||
@ -2845,13 +2846,6 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) {
|
||||
}
|
||||
SKIP_BLANKS;
|
||||
|
||||
/*
|
||||
* Create the document if not done already.
|
||||
*/
|
||||
if (ctxt->myDoc == NULL) {
|
||||
ctxt->myDoc = htmlNewDoc(NULL, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Time to start parsing the tree itself
|
||||
*/
|
||||
@ -3171,6 +3165,10 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
||||
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
|
||||
ctxt->sax->setDocumentLocator(ctxt->userData,
|
||||
&xmlDefaultSAXLocator);
|
||||
if ((ctxt->sax) && (ctxt->sax->startDocument) &&
|
||||
(!ctxt->disableSAX))
|
||||
ctxt->sax->startDocument(ctxt->userData);
|
||||
|
||||
cur = in->cur[0];
|
||||
next = in->cur[1];
|
||||
if ((cur == '<') && (next == '!') &&
|
||||
@ -3190,7 +3188,6 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
||||
fprintf(stderr, "HPP: entering PROLOG\n");
|
||||
#endif
|
||||
} else {
|
||||
ctxt->myDoc = htmlNewDoc(NULL, NULL);
|
||||
ctxt->instate = XML_PARSER_MISC;
|
||||
}
|
||||
#ifdef DEBUG_PUSH
|
||||
|
@ -158,6 +158,8 @@ htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
|
||||
/*
|
||||
* Special cases.
|
||||
*/
|
||||
if (cur->type == XML_DTD_NODE)
|
||||
return;
|
||||
if (cur->type == XML_HTML_DOCUMENT_NODE) {
|
||||
htmlDocContentDump(buf, (xmlDocPtr) cur);
|
||||
return;
|
||||
|
18
SAX.c
18
SAX.c
@ -25,6 +25,7 @@
|
||||
#include <libxml/xmlIO.h>
|
||||
#include <libxml/SAX.h>
|
||||
#include <libxml/uri.h>
|
||||
#include <libxml/HTMLtree.h>
|
||||
|
||||
/* #define DEBUG_SAX */
|
||||
/* #define DEBUG_SAX_TREE */
|
||||
@ -157,11 +158,22 @@ internalSubset(void *ctx, const xmlChar *name,
|
||||
const xmlChar *ExternalID, const xmlChar *SystemID)
|
||||
{
|
||||
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
|
||||
xmlDtdPtr dtd;
|
||||
#ifdef DEBUG_SAX
|
||||
fprintf(stderr, "SAX.internalSubset(%s, %s, %s)\n",
|
||||
name, ExternalID, SystemID);
|
||||
#endif
|
||||
xmlCreateIntSubset(ctxt->myDoc, name, ExternalID, SystemID);
|
||||
|
||||
if (ctxt->myDoc == NULL)
|
||||
return;
|
||||
dtd = xmlGetIntSubset(ctxt->myDoc);
|
||||
if (dtd != NULL) {
|
||||
xmlUnlinkNode((xmlNodePtr) dtd);
|
||||
xmlFreeDtd(dtd);
|
||||
ctxt->myDoc->intSubset = NULL;
|
||||
}
|
||||
ctxt->myDoc->intSubset =
|
||||
xmlCreateIntSubset(ctxt->myDoc, name, ExternalID, SystemID);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1485,7 +1497,7 @@ xmlDefaultSAXHandlerInit(void)
|
||||
* Default handler for HTML, builds the DOM tree
|
||||
*/
|
||||
xmlSAXHandler htmlDefaultSAXHandler = {
|
||||
NULL,
|
||||
internalSubset,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
@ -1522,7 +1534,7 @@ xmlSAXHandler htmlDefaultSAXHandler = {
|
||||
void
|
||||
htmlDefaultSAXHandlerInit(void)
|
||||
{
|
||||
htmlDefaultSAXHandler.internalSubset = NULL;
|
||||
htmlDefaultSAXHandler.internalSubset = internalSubset;
|
||||
htmlDefaultSAXHandler.externalSubset = NULL;
|
||||
htmlDefaultSAXHandler.isStandalone = NULL;
|
||||
htmlDefaultSAXHandler.hasInternalSubset = NULL;
|
||||
|
@ -48,7 +48,7 @@ mail</a>:</p>
|
||||
Use <strong>xmlDocGetRootElement(doc)</strong> to get the root element of
|
||||
a document. Alternatively if you are sure to not reference Dtds nor have
|
||||
PIs or comments before or after the root element s/->root/->children/g
|
||||
will probably do it. </li>
|
||||
will probably do it.</li>
|
||||
<li>The white space issue, this one is more complex, unless special case of
|
||||
validating parsing, the line breaks and spaces usually used for indenting
|
||||
and formatting the document content becomes significant. So they are
|
||||
@ -90,7 +90,7 @@ They offers the following:</p>
|
||||
<strong>#include<libxml/...></strong> in both cases.</li>
|
||||
<li>similar identifiers defined via macros for the child and root fields:
|
||||
respectively <strong>xmlChildrenNode</strong> and
|
||||
<strong>xmlRootNode</strong> </li>
|
||||
<strong>xmlRootNode</strong></li>
|
||||
<li>a new macro <strong>LIBXML_TEST_VERSION</strong> which should be
|
||||
inserted once in the client code</li>
|
||||
</ol>
|
||||
@ -118,7 +118,7 @@ following:</p>
|
||||
<strong>LIBXML_TEST_VERSION</strong> is a fine place).</li>
|
||||
</ol>
|
||||
|
||||
<p>Following those 3 steps should work. It worked for some of my own code.</p>
|
||||
<p>Following those steps should work. It worked for some of my own code.</p>
|
||||
|
||||
<p>Let me put some emphasis on the fact that there is far more changes from
|
||||
libxml 1.x to 2.x than the ones you may have to patch for. The overall code
|
||||
@ -128,6 +128,6 @@ upgrade, it may cost a lot on the long term ...</p>
|
||||
|
||||
<p><a href="mailto:Daniel.Veillard@w3.org">Daniel Veillard</a></p>
|
||||
|
||||
<p>$Id: upgrade.html,v 1.5 2000/05/06 08:11:18 veillard Exp $</p>
|
||||
<p>$Id: upgrade.html,v 1.6 2000/06/29 00:43:26 veillard Exp $</p>
|
||||
</body>
|
||||
</html>
|
||||
|
@ -414,6 +414,7 @@ xmlDtdPtr xmlNewDtd (xmlDocPtr doc,
|
||||
const xmlChar *name,
|
||||
const xmlChar *ExternalID,
|
||||
const xmlChar *SystemID);
|
||||
xmlDtdPtr xmlGetIntSubset (xmlDocPtr doc);
|
||||
void xmlFreeDtd (xmlDtdPtr cur);
|
||||
xmlNsPtr xmlNewGlobalNs (xmlDocPtr doc,
|
||||
const xmlChar *href,
|
||||
|
1
tree.h
1
tree.h
@ -414,6 +414,7 @@ xmlDtdPtr xmlNewDtd (xmlDocPtr doc,
|
||||
const xmlChar *name,
|
||||
const xmlChar *ExternalID,
|
||||
const xmlChar *SystemID);
|
||||
xmlDtdPtr xmlGetIntSubset (xmlDocPtr doc);
|
||||
void xmlFreeDtd (xmlDtdPtr cur);
|
||||
xmlNsPtr xmlNewGlobalNs (xmlDocPtr doc,
|
||||
const xmlChar *href,
|
||||
|
3
valid.c
3
valid.c
@ -2031,6 +2031,9 @@ xmlIsRef(xmlDocPtr doc, xmlNodePtr elem, xmlAttrPtr attr) {
|
||||
((attr->name[1] == 'D') || (attr->name[1] == 'd')) &&
|
||||
(attr->name[2] == 0)) return(1);
|
||||
*******************/
|
||||
} else if (doc->type == XML_HTML_DOCUMENT_NODE) {
|
||||
/* TODO @@@ */
|
||||
return(0);
|
||||
} else {
|
||||
xmlAttributePtr attrDecl;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user