1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-03-06 20:58:17 +03:00

work done on auto-opening of <p> tags and cleanup of SAX output, Daniel.

This commit is contained in:
Daniel Veillard 2000-08-19 19:52:36 +00:00
parent 979e55e889
commit b8f25c9118
23 changed files with 373 additions and 40 deletions

View File

@ -1,3 +1,8 @@
Sat Aug 19 21:02:08 CEST 2000 Daniel Veillard <Daniel.Veillard@w3.org>
* HTMLparser.c SAX.c tree.c HTMLtree.h result/HTML/*: work
done on auto-opening of <p> tags and cleanup of SAX output
Sat Aug 19 18:45:40 CEST 2000 Daniel Veillard <Daniel.Veillard@w3.org>
* libxml.4 xmllint.1 Makefile.am libxml.spec.in: added man pages

View File

@ -552,6 +552,20 @@ char *htmlStartClose[] = {
NULL
};
/*
* The list of HTML elements which are supposed not to have
* CDATA content and where a p element will be implied
*
* TODO: extend that list by reading the HTML SGML DtD on
* implied paragraph
*/
static char *htmlNoContentElements[] = {
"html",
"head",
"body",
NULL
};
static char** htmlStartCloseIndex[100];
static int htmlStartCloseIndexinitialized = 0;
@ -845,6 +859,49 @@ htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *newtag) {
}
}
/**
* htmlCheckParagraph
* @ctxt: an HTML parser context
*
* Check whether a p element need to be implied before inserting
* characters in the current element.
*
* Returns 1 if a paragraph has been inserted, 0 if not and -1
* in case of error.
*/
int
htmlCheckParagraph(htmlParserCtxtPtr ctxt) {
const xmlChar *tag;
int i;
if (ctxt == NULL)
return(-1);
tag = ctxt->name;
if (tag == NULL) {
htmlAutoClose(ctxt, BAD_CAST"p");
htmlCheckImplied(ctxt, BAD_CAST"p");
htmlnamePush(ctxt, xmlStrdup(BAD_CAST"p"));
if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
return(1);
}
for (i = 0; htmlNoContentElements[i] != NULL; i++) {
if (!xmlStrcmp(tag, BAD_CAST htmlNoContentElements[i])) {
#ifdef DEBUG
fprintf(stderr,"Implied element paragraph\n");
#endif
htmlAutoClose(ctxt, BAD_CAST"p");
htmlCheckImplied(ctxt, BAD_CAST"p");
htmlnamePush(ctxt, xmlStrdup(BAD_CAST"p"));
if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
return(1);
}
}
return(0);
}
/************************************************************************
* *
* The list of HTML predefined entities *
@ -1253,7 +1310,8 @@ UTF8ToHtml(unsigned char* out, int *outlen,
sizeof(html40EntitiesTable[0]));i++) {
if (html40EntitiesTable[i].value == c) {
#ifdef DEBUG
fprintf(stderr,"Found entity %s\n", name);
fprintf(stderr,"Found entity %s\n",
html40EntitiesTable[i].name);
#endif
goto found_ent;
}
@ -1496,20 +1554,21 @@ htmlHandleEntity(htmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
/*
* Just handle the content as a set of chars.
*/
htmlCheckParagraph(ctxt);
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
ctxt->sax->characters(ctxt->userData, entity->content, len);
}
/**
* htmlNewDoc:
* htmlNewDocNoDtD:
* @URI: URI for the dtd, or NULL
* @ExternalID: the external ID of the DTD, or NULL
*
* Returns a new document
* Returns a new document, do not intialize the DTD if not provided
*/
htmlDocPtr
htmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID) {
htmlNewDocNoDtD(const xmlChar *URI, const xmlChar *ExternalID) {
xmlDocPtr cur;
/*
@ -1525,12 +1584,8 @@ htmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID) {
cur->type = XML_HTML_DOCUMENT_NODE;
cur->version = NULL;
cur->intSubset = NULL;
if ((ExternalID == NULL) &&
(URI == NULL))
xmlCreateIntSubset(cur, BAD_CAST "HTML",
BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN",
BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd");
else
if ((ExternalID != NULL) ||
(URI != NULL))
xmlCreateIntSubset(cur, BAD_CAST "HTML", ExternalID, URI);
cur->doc = cur;
cur->name = NULL;
@ -1548,6 +1603,23 @@ htmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID) {
return(cur);
}
/**
* htmlNewDoc:
* @URI: URI for the dtd, or NULL
* @ExternalID: the external ID of the DTD, or NULL
*
* Returns a new document
*/
htmlDocPtr
htmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID) {
if ((URI == NULL) && (ExternalID == NULL))
return(htmlNewDocNoDtD(
BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN",
BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd"));
return(htmlNewDocNoDtD(URI, ExternalID));
}
/************************************************************************
* *
@ -2062,6 +2134,7 @@ htmlParseCharData(htmlParserCtxtPtr ctxt, int cdata) {
ctxt->sax->ignorableWhitespace(ctxt->userData,
buf, nbchar);
} else {
htmlCheckParagraph(ctxt);
if (ctxt->sax->characters != NULL)
ctxt->sax->characters(ctxt->userData, buf, nbchar);
}
@ -2080,6 +2153,7 @@ htmlParseCharData(htmlParserCtxtPtr ctxt, int cdata) {
if (ctxt->sax->ignorableWhitespace != NULL)
ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
} else {
htmlCheckParagraph(ctxt);
if (ctxt->sax->characters != NULL)
ctxt->sax->characters(ctxt->userData, buf, nbchar);
}
@ -2861,16 +2935,19 @@ htmlParseReference(htmlParserCtxtPtr ctxt) {
}
out[i] = 0;
htmlCheckParagraph(ctxt);
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
ctxt->sax->characters(ctxt->userData, out, i);
} else {
ent = htmlParseEntityRef(ctxt, &name);
if (name == NULL) {
htmlCheckParagraph(ctxt);
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
return;
}
if ((ent == NULL) || (ent->value <= 0)) {
htmlCheckParagraph(ctxt);
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) {
ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
ctxt->sax->characters(ctxt->userData, name, xmlStrlen(name));
@ -2895,6 +2972,7 @@ htmlParseReference(htmlParserCtxtPtr ctxt) {
}
out[i] = 0;
htmlCheckParagraph(ctxt);
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
ctxt->sax->characters(ctxt->userData, out, i);
}
@ -2941,6 +3019,21 @@ htmlParseContent(htmlParserCtxtPtr ctxt) {
return;
}
/*
* Sometimes DOCTYPE arrives in the middle of the document
*/
if ((CUR == '<') && (NXT(1) == '!') &&
(UPP(2) == 'D') && (UPP(3) == 'O') &&
(UPP(4) == 'C') && (UPP(5) == 'T') &&
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
(UPP(8) == 'E')) {
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
"Misplaced DOCTYPE declaration\n");
ctxt->wellFormed = 0;
htmlParseDocTypeDecl(ctxt);
}
/*
* First case : a comment
*/
@ -3185,6 +3278,8 @@ htmlParseElement(htmlParserCtxtPtr ctxt) {
int
htmlParseDocument(htmlParserCtxtPtr ctxt) {
xmlDtdPtr dtd;
htmlDefaultSAXHandlerInit();
ctxt->html = 1;
@ -3258,6 +3353,15 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) {
*/
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
ctxt->sax->endDocument(ctxt->userData);
if (ctxt->myDoc != NULL) {
dtd = xmlGetIntSubset(ctxt->myDoc);
if (dtd == NULL)
ctxt->myDoc->intSubset =
xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "HTML",
BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN",
BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd");
}
if (! ctxt->wellFormed) return(-1);
return(0);
}
@ -3848,6 +3952,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
xmlChar chr[2] = { 0 , 0 } ;
chr[0] = (xmlChar) ctxt->token;
htmlCheckParagraph(ctxt);
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
ctxt->sax->characters(ctxt->userData, chr, 1);
ctxt->token = 0;
@ -3862,6 +3967,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
ctxt->sax->ignorableWhitespace(
ctxt->userData, &cur, 1);
} else {
htmlCheckParagraph(ctxt);
if (ctxt->sax->characters != NULL)
ctxt->sax->characters(
ctxt->userData, &cur, 1);
@ -3878,7 +3984,23 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
cur = in->cur[0];
next = in->cur[1];
cons = ctxt->nbChars;
if ((cur == '<') && (next == '!') &&
/*
* Sometimes DOCTYPE arrives in the middle of the document
*/
if ((cur == '<') && (next == '!') &&
(UPP(2) == 'D') && (UPP(3) == 'O') &&
(UPP(4) == 'C') && (UPP(5) == 'T') &&
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
(UPP(8) == 'E')) {
if ((!terminate) &&
(htmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
goto done;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
"Misplaced DOCTYPE declaration\n");
ctxt->wellFormed = 0;
htmlParseDocTypeDecl(ctxt);
} else if ((cur == '<') && (next == '!') &&
(in->cur[2] == '-') && (in->cur[3] == '-')) {
if ((!terminate) &&
(htmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
@ -4040,6 +4162,17 @@ done:
ctxt->sax->endDocument(ctxt->userData);
}
}
if ((ctxt->myDoc != NULL) &&
((terminate) || (ctxt->instate == XML_PARSER_EOF) ||
(ctxt->instate == XML_PARSER_EPILOG))) {
xmlDtdPtr dtd;
dtd = xmlGetIntSubset(ctxt->myDoc);
if (dtd == NULL)
ctxt->myDoc->intSubset =
xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "HTML",
BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN",
BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd");
}
#ifdef DEBUG_PUSH
fprintf(stderr, "HPP: done %d\n", ret);
#endif

View File

@ -25,6 +25,8 @@ extern "C" {
htmlDocPtr htmlNewDoc (const xmlChar *URI,
const xmlChar *ExternalID);
htmlDocPtr htmlNewDocNoDtD (const xmlChar *URI,
const xmlChar *ExternalID);
const xmlChar * htmlGetMetaEncoding (htmlDocPtr doc);
int htmlSetMetaEncoding (htmlDocPtr doc,
const xmlChar *encoding);

4
SAX.c
View File

@ -169,6 +169,8 @@ internalSubset(void *ctx, const xmlChar *name,
return;
dtd = xmlGetIntSubset(ctxt->myDoc);
if (dtd != NULL) {
if (ctxt->html)
return;
xmlUnlinkNode((xmlNodePtr) dtd);
xmlFreeDtd(dtd);
ctxt->myDoc->intSubset = NULL;
@ -605,7 +607,7 @@ startDocument(void *ctx)
if (ctxt->html) {
if (ctxt->myDoc == NULL)
#ifdef LIBXML_HTML_ENABLED
ctxt->myDoc = htmlNewDoc(NULL, NULL);
ctxt->myDoc = htmlNewDocNoDtD(NULL, NULL);
#else
fprintf(stderr, "libxml2 built without HTML support\n");
#endif

View File

@ -25,6 +25,8 @@ extern "C" {
htmlDocPtr htmlNewDoc (const xmlChar *URI,
const xmlChar *ExternalID);
htmlDocPtr htmlNewDocNoDtD (const xmlChar *URI,
const xmlChar *ExternalID);
const xmlChar * htmlGetMetaEncoding (htmlDocPtr doc);
int htmlSetMetaEncoding (htmlDocPtr doc,
const xmlChar *encoding);

View File

@ -3,8 +3,10 @@
<head><title>This service is temporary down</title></head>
<body bgcolor="#FFFFFF">
<h1 align="center">Sorry, this service is temporary down</h1>
<p>
We are doing our best to get it back on-line,
</p>
<p>The W3C system administrators</p>
</body>
</html>

View File

@ -1,36 +1,53 @@
SAX.setDocumentLocator()
SAX.startDocument()
SAX.startElement(html)
SAX.startElement(body)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.startElement(head)
SAX.endElement(head)
SAX.startElement(p)
SAX.characters(
, 3)
SAX.endElement(p)
SAX.startElement(title)
SAX.characters(This service is temporary down, 30)
SAX.endElement(title)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(head)
SAX.error: Unexpected end tag : head
SAX.characters(
, 2)
SAX.endElement(p)
SAX.startElement(body, bgcolor='#FFFFFF')
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.startElement(h1, align='center')
SAX.characters(Sorry, this service is tempora, 37)
SAX.endElement(h1)
SAX.startElement(p)
SAX.characters(
We are doing our best to get , 48)
SAX.endElement(p)
SAX.startElement(p)
SAX.characters(The W3C system administrators, 29)
SAX.endElement(p)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.endElement(body)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.endElement(body)
SAX.endElement(html)
SAX.ignorableWhitespace(
, 1)

View File

@ -8,7 +8,7 @@
function NS_NewOpen(url,nam,atr){return(new NS_NullWindow());}
window.open=NS_NewOpen;
</script>
<!-- END Naviscope Javascript -->!DOCTYPE HTML PUBLIC &quot;-//W3C//DTD HTML 4.0 Transitional//EN&quot;&gt;<!-- saved from url=(0027)http://www.agents-tech.com/ --><meta content="text/html; charset=iso-8859-1" http-equiv="Content-Type">
<!-- END Naviscope Javascript --><!-- saved from url=(0027)http://www.agents-tech.com/ --><meta content="text/html; charset=iso-8859-1" http-equiv="Content-Type">
<meta content="Copernic.com Inc. develops innovative agent technology solutions to efficiently access and manage the overwhelming quantity of information available on the Internet and intranets." name="DESCRIPTION">
<meta content="agent,technology,intranet,extranet,management,filtering,ranking,solution,service,intelligent,intelligence,client,server,architecture,developer,development,information,telecommunication,announcement,press,product,profile,contact,multi-agent,meta-search,metasearch,multi-thread,mobile,wireless,shopping,robot,PCS,Copernic,engine,toolkit,CDK,EDK" name="KEYWORDS">
<meta content="MSHTML 5.00.3103.1000" name="GENERATOR">

View File

@ -1,3 +1,3 @@
./test/HTML/doc2.htm:10: error: htmlParseStartTag: invalid element name
./test/HTML/doc2.htm:10: error: Misplaced DOCTYPE declaration
<!-- END Naviscope Javascript --><!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Tr
^
^

View File

@ -8,6 +8,9 @@ SAX.startElement(head)
SAX.startElement(title)
SAX.characters(Welcome to Copernic.com, 23)
SAX.endElement(title)
SAX.endElement(head)
SAX.startElement(body)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.startElement(script, language='javascript')
@ -17,8 +20,8 @@ SAX.endElement(script)
SAX.characters(
, 1)
SAX.comment( END Naviscope Javascript )
SAX.error: htmlParseStartTag: invalid element name
SAX.characters(!DOCTYPE HTML PUBLIC "-//W3C//, 61)
SAX.error: Misplaced DOCTYPE declaration
SAX.internalSubset(HTML, -//W3C//DTD HTML 4.0 Transitional//EN, )
SAX.comment( saved from url=(0027)http://www.agents-tech.com/ )
SAX.characters(
, 1)
@ -36,8 +39,7 @@ SAX.characters(
, 1)
SAX.startElement(meta, content='MSHTML 5.00.3103.1000', name='GENERATOR')
SAX.endElement(meta)
SAX.endElement(head)
SAX.startElement(body)
SAX.error: Unexpected end tag : head
SAX.startElement(frameset, border='false', cols='172,*', frameBorder='0', frameSpacing='0')
SAX.startElement(frame, marginHeight='0', marginWidth='0', name='left', noResize, scrolling='no', src='doc2_files/side.htm', target='rtop')
SAX.endElement(frame)
@ -52,18 +54,23 @@ SAX.characters(
, 4)
SAX.startElement(body, bgcolor='#FFFFFF', text='#000000', link='#000080', vlink='#000080', alink='#000080', topmargin='0', leftmargin='0', marginheight='0', marginwidth='0')
SAX.startElement(p)
SAX.characters(
, 3)
SAX.endElement(p)
SAX.startElement(p)
SAX.characters(This page uses frames, but you, 61)
SAX.endElement(p)
SAX.startElement(p)
SAX.characters(
, 3)
SAX.endElement(p)
SAX.endElement(body)
SAX.characters(
, 3)
SAX.endElement(noframes)
SAX.endElement(frameset)
SAX.endElement(p)
SAX.endElement(body)
SAX.endElement(html)
SAX.ignorableWhitespace(

View File

@ -8,7 +8,7 @@
function NS_NewOpen(url,nam,atr){return(new NS_NullWindow());}
window.open=NS_NewOpen;
</script>
<!-- END Naviscope Javascript -->!DOCTYPE HTML PUBLIC &quot;-//W3C//DTD HTML 3.2//EN&quot;&gt;<!--last modified on Tuesday, February 22, 2000 11:47 PM --><meta content="text/html;CHARSET=iso-8859-1" http-equiv="Content-Type">
<!-- END Naviscope Javascript --><!--last modified on Tuesday, February 22, 2000 11:47 PM --><meta content="text/html;CHARSET=iso-8859-1" http-equiv="Content-Type">
<meta content="Tim" name="Author">
<style type="text/css">A.nav {
COLOR: #003399; TEXT-DECORATION: none

View File

@ -1,6 +1,6 @@
./test/HTML/doc3.htm:10: error: htmlParseStartTag: invalid element name
./test/HTML/doc3.htm:10: error: Misplaced DOCTYPE declaration
<!-- END Naviscope Javascript --><!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//E
^
^
./test/HTML/doc3.htm:52: error: htmlParseEntityRef: expecting ';'
href="http://ads.gamesquad.net/addclick.exe/adclick.cgi?REGION=game|tech|ent&i
^

View File

@ -8,6 +8,9 @@ SAX.startElement(head)
SAX.startElement(title)
SAX.characters(BP6.com #1 online resource for, 47)
SAX.endElement(title)
SAX.endElement(head)
SAX.startElement(body)
SAX.startElement(p)
SAX.characters(
, 2)
SAX.startElement(script, language='javascript')
@ -17,8 +20,8 @@ SAX.endElement(script)
SAX.characters(
, 2)
SAX.comment( END Naviscope Javascript )
SAX.error: htmlParseStartTag: invalid element name
SAX.characters(!DOCTYPE HTML PUBLIC "-//W3C//, 48)
SAX.error: Misplaced DOCTYPE declaration
SAX.internalSubset(HTML, -//W3C//DTD HTML 3.2//EN, )
SAX.comment(last modified on Tuesday, February 22, 2000 11:47 PM )
SAX.characters(
, 2)
@ -69,12 +72,15 @@ SAX.characters(
, 4)
SAX.startElement(meta, content='MSHTML 5.00.3103.1000', name='GENERATOR')
SAX.endElement(meta)
SAX.endElement(head)
SAX.error: Unexpected end tag : head
SAX.characters(
, 2)
SAX.endElement(p)
SAX.startElement(body, aLink='red', bgColor='black', link='red', text='white', vLink='red')
SAX.startElement(p)
SAX.characters(
, 2)
SAX.endElement(p)
SAX.startElement(p)
SAX.characters(
, 2)
@ -2913,6 +2919,7 @@ SAX.comment( <TR> <TD WIDTH="780"> <P ALIGN="CENTER"><FONT SIZE="1" COLOR="#9
SAX.endElement(tbody)
SAX.endElement(table)
SAX.endElement(div)
SAX.startElement(p)
SAX.characters(
, 2)
SAX.startElement(script)
@ -2920,6 +2927,8 @@ SAX.characters( window.open=NS_ActualOpen; , 28)
SAX.endElement(script)
SAX.characters(
, 2)
SAX.endElement(p)
SAX.endElement(body)
SAX.endElement(body)
SAX.endElement(html)
SAX.ignorableWhitespace(

View File

@ -2,30 +2,39 @@ SAX.setDocumentLocator()
SAX.startDocument()
SAX.internalSubset(html, -//IETF//DTD HTML//EN, )
SAX.startElement(html)
SAX.startElement(body)
SAX.startElement(p)
SAX.characters(
, 2)
SAX.endElement(p)
SAX.startElement(head)
SAX.endElement(head)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.startElement(meta, name='GENERATOR', content='Microsoft FrontPage 4.0')
SAX.endElement(meta)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.startElement(title)
SAX.characters(README - Microsoft FrontPage 2, 51)
SAX.endElement(title)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.startElement(meta, name='Microsoft Theme', content='none')
SAX.endElement(meta)
SAX.characters(
, 1)
SAX.endElement(head)
SAX.error: Unexpected end tag : head
SAX.characters(
, 2)
SAX.endElement(p)
SAX.startElement(body)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.startElement(font, face='Verdana')
@ -169,6 +178,7 @@ SAX.characters(
SAX.endElement(font)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.startElement(blockquote)
SAX.characters(
, 3)
@ -184,6 +194,7 @@ SAX.endElement(font)
SAX.characters(
, 1)
SAX.endElement(blockquote)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.startElement(font, face='Verdana')
@ -454,9 +465,13 @@ SAX.characters(
SAX.endElement(font)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.endElement(body)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.endElement(body)
SAX.endElement(html)
SAX.ignorableWhitespace(
, 1)

View File

@ -2,21 +2,31 @@ SAX.setDocumentLocator()
SAX.startDocument()
SAX.internalSubset(HTML, -//W3C//DTD HTML 4.0 Transitional//EN, http://www.w3.org/TR/REC-html40/loose.dtd)
SAX.startElement(html)
SAX.startElement(body)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.startElement(head)
SAX.endElement(head)
SAX.startElement(p)
SAX.characters(
, 3)
SAX.endElement(p)
SAX.startElement(title)
SAX.endElement(title)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(head)
SAX.error: Unexpected end tag : head
SAX.characters(
, 1)
SAX.endElement(p)
SAX.startElement(body)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.startElement(ul)
SAX.characters(
, 1)
@ -28,9 +38,12 @@ SAX.startElement(li)
SAX.characters(Second item, closes the first , 34)
SAX.endElement(li)
SAX.endElement(ul)
SAX.startElement(p)
SAX.characters(
, 2)
SAX.endElement(p)
SAX.endElement(body)
SAX.endElement(body)
SAX.endElement(html)
SAX.ignorableWhitespace(

View File

@ -1,35 +1,50 @@
SAX.setDocumentLocator()
SAX.startDocument()
SAX.startElement(html)
SAX.startElement(body)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.startElement(head)
SAX.endElement(head)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.startElement(title)
SAX.characters(Regression test 1, 17)
SAX.endElement(title)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(head)
SAX.error: Unexpected end tag : head
SAX.characters(
, 1)
SAX.endElement(p)
SAX.startElement(body)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.startElement(h1)
SAX.characters(Regression test 1, 17)
SAX.endElement(h1)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.startElement(p)
SAX.characters(
Ok file no problem
, 20)
SAX.endElement(p)
SAX.endElement(body)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.endElement(body)
SAX.endElement(html)
SAX.ignorableWhitespace(
, 1)

View File

@ -1,27 +1,39 @@
SAX.setDocumentLocator()
SAX.startDocument()
SAX.startElement(html)
SAX.startElement(body)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.startElement(head)
SAX.endElement(head)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.startElement(title)
SAX.characters(Regression test 2, 17)
SAX.endElement(title)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(head)
SAX.error: Unexpected end tag : head
SAX.characters(
, 1)
SAX.endElement(p)
SAX.startElement(body)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.startElement(h1)
SAX.characters(Regression test 2, 17)
SAX.endElement(h1)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.startElement(p)
SAX.characters(
Autoclose of tag P
@ -33,8 +45,11 @@ Ok file no problem
, 20)
SAX.endElement(p)
SAX.endElement(body)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.endElement(body)
SAX.endElement(html)
SAX.ignorableWhitespace(
, 1)

View File

@ -1,27 +1,39 @@
SAX.setDocumentLocator()
SAX.startDocument()
SAX.startElement(html)
SAX.startElement(body)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.startElement(head)
SAX.endElement(head)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.startElement(title)
SAX.characters(Regression test 3, 17)
SAX.endElement(title)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(head)
SAX.error: Unexpected end tag : head
SAX.characters(
, 1)
SAX.endElement(p)
SAX.startElement(body)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.startElement(h1)
SAX.characters(Regression test 3, 17)
SAX.endElement(h1)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.startElement(p)
SAX.characters(
Autoclose of tag P
@ -29,16 +41,21 @@ Autoclose of tag P
SAX.endElement(p)
SAX.startElement(hr)
SAX.endElement(hr)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.startElement(p)
SAX.characters(
Ok file no problem
, 20)
SAX.endElement(p)
SAX.endElement(body)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.endElement(body)
SAX.endElement(html)
SAX.ignorableWhitespace(
, 1)

View File

@ -1,27 +1,39 @@
SAX.setDocumentLocator()
SAX.startDocument()
SAX.startElement(html)
SAX.startElement(body)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.startElement(head)
SAX.endElement(head)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.startElement(title)
SAX.characters(Regression test 4, 17)
SAX.endElement(title)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(head)
SAX.error: Unexpected end tag : head
SAX.characters(
, 1)
SAX.endElement(p)
SAX.startElement(body)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.startElement(h1)
SAX.characters(Regression test 4, 17)
SAX.endElement(h1)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.startElement(p)
SAX.characters(
Wrong close of tag P
@ -29,14 +41,20 @@ Wrong close of tag P
SAX.endElement(p)
SAX.startElement(hr)
SAX.endElement(hr)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.error: Unexpected end tag : p
SAX.endElement(p)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.endElement(body)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.endElement(body)
SAX.endElement(html)
SAX.ignorableWhitespace(
, 1)

View File

@ -3,17 +3,25 @@ SAX.startDocument()
SAX.internalSubset(HTML, -//W3C//DTD HTML 4.0 Transitional//EN, http://www.w3.org/TR/REC-html40/loose.dtd)
SAX.startElement(html)
SAX.startElement(head)
SAX.endElement(head)
SAX.startElement(body)
SAX.startElement(p)
SAX.characters( , 1)
SAX.endElement(p)
SAX.startElement(title)
SAX.characters(Linux Today, 11)
SAX.endElement(title)
SAX.endElement(head)
SAX.error: Unexpected end tag : head
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.startElement(body, bgcolor='White', link='Blue', text='Black', VLINK='Black', ALINK='Red')
SAX.startElement(p)
SAX.characters(
, 2)
SAX.endElement(p)
SAX.startElement(center)
SAX.characters(
, 1)
@ -130,15 +138,20 @@ SAX.endElement(font)
SAX.characters(
, 1)
SAX.endElement(center)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.endElement(body)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.endElement(body)
SAX.endElement(html)
SAX.ignorableWhitespace(
, 1)

View File

@ -1,19 +1,23 @@
SAX.setDocumentLocator()
SAX.startDocument()
SAX.startElement(html)
SAX.startElement(body)
SAX.startElement(p)
SAX.characters(
, 2)
SAX.endElement(p)
SAX.startElement(head)
SAX.endElement(head)
SAX.startElement(p)
SAX.characters(
, 2)
SAX.startElement(base, target='contents')
SAX.endElement(base)
SAX.characters(
, 2)
SAX.endElement(head)
SAX.error: Unexpected end tag : head
SAX.characters(
, 2)
SAX.startElement(body)
SAX.startElement(a, name='ProblemDomain.Package')
SAX.startElement(h2)
SAX.characters(Component Package diagram Prob, 39)
@ -21,13 +25,16 @@ SAX.endElement(h2)
SAX.characters(
, 2)
SAX.endElement(a)
SAX.endElement(p)
SAX.startElement(p)
SAX.endElement(p)
SAX.startElement(hr)
SAX.endElement(hr)
SAX.error: Unexpected end tag : p
SAX.startElement(p)
SAX.characters(
, 2)
SAX.endElement(p)
SAX.startElement(dl)
SAX.characters(
, 2)
@ -59,15 +66,19 @@ SAX.characters(Interface, thats stores and ma, 58)
SAX.endElement(dd)
SAX.endElement(dd)
SAX.endElement(dl)
SAX.startElement(p)
SAX.characters(
, 2)
SAX.endElement(p)
SAX.startElement(p)
SAX.endElement(p)
SAX.startElement(hr)
SAX.endElement(hr)
SAX.error: Unexpected end tag : p
SAX.startElement(p)
SAX.characters(
, 2)
SAX.endElement(p)
SAX.startElement(dl)
SAX.characters(
@ -163,9 +174,11 @@ SAX.endElement(dt)
SAX.characters(
, 2)
SAX.endElement(dl)
SAX.startElement(p)
SAX.characters(
, 4)
SAX.endElement(p)
SAX.startElement(h4)
SAX.startElement(b)
SAX.characters(Links, 5)
@ -173,8 +186,10 @@ SAX.error: Opening and ending tag mismatch: h4 and b
SAX.endElement(b)
SAX.endElement(h4)
SAX.error: Unexpected end tag : b
SAX.startElement(p)
SAX.characters(
, 2)
SAX.endElement(p)
SAX.startElement(ul)
SAX.startElement(li)
SAX.startElement(b)
@ -185,12 +200,16 @@ SAX.characters(HumanInterface, 14)
SAX.endElement(a)
SAX.endElement(li)
SAX.endElement(ul)
SAX.startElement(p)
SAX.characters(
, 2)
SAX.endElement(p)
SAX.startElement(dir)
SAX.endElement(dir)
SAX.startElement(p)
SAX.characters(
, 2)
SAX.endElement(p)
SAX.startElement(ul)
SAX.startElement(li)
SAX.startElement(b)
@ -201,12 +220,16 @@ SAX.characters(DataManagement.FlatFile, 23)
SAX.endElement(a)
SAX.endElement(li)
SAX.endElement(ul)
SAX.startElement(p)
SAX.characters(
, 2)
SAX.endElement(p)
SAX.startElement(dir)
SAX.endElement(dir)
SAX.startElement(p)
SAX.characters(
, 2)
SAX.endElement(p)
SAX.startElement(ul)
SAX.startElement(li)
SAX.startElement(b)
@ -217,12 +240,16 @@ SAX.characters(DataManagement, 14)
SAX.endElement(a)
SAX.endElement(li)
SAX.endElement(ul)
SAX.startElement(p)
SAX.characters(
, 2)
SAX.endElement(p)
SAX.startElement(dir)
SAX.endElement(dir)
SAX.startElement(p)
SAX.characters(
, 2)
SAX.endElement(p)
SAX.endElement(body)
SAX.endElement(html)
SAX.ignorableWhitespace(

View File

@ -8,9 +8,11 @@ SAX.characters(Top Stories News from Wired Ne, 32)
SAX.endElement(title)
SAX.endElement(head)
SAX.startElement(body, bgcolor='#FFFFFF', text='#000000', link='#333399', vlink='#660066', alink='#666699')
SAX.startElement(p)
SAX.characters(
, 2)
SAX.endElement(p)
SAX.startElement(table, border='0', width='600', cellspacing='0', cellpadding='0')
SAX.characters(
, 3)
@ -315,6 +317,7 @@ SAX.endElement(tr)
SAX.characters(
, 1)
SAX.endElement(table)
SAX.startElement(p)
SAX.characters(
, 2)
@ -329,6 +332,7 @@ SAX.startElement(a, name='#')
SAX.endElement(a)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.startElement(table, border='0', width='600', cellspacing='0', cellpadding='0')
SAX.characters(
@ -574,6 +578,7 @@ SAX.comment(
SAX.characters(
, 1)
SAX.endElement(table)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.comment( end WIRED NEWS header )
@ -584,6 +589,7 @@ SAX.comment( begin upper left side Navigation )
SAX.characters(
, 2)
SAX.endElement(p)
SAX.startElement(table, border='0', cellpadding='3', cellspacing='0', align='LEFT', bgcolor='#FFFFFF')
SAX.characters(
, 3)
@ -1465,6 +1471,7 @@ SAX.characters(
, 2)
SAX.endElement(table)
SAX.startElement(p)
SAX.characters(
, 2)
@ -1475,6 +1482,7 @@ SAX.comment( CONTENT TABLE )
SAX.characters(
, 2)
SAX.endElement(p)
SAX.startElement(table, border='0', width='447', cellspacing='0', cellpadding='0', bordercolor='#66FF00')
SAX.characters(
, 2)
@ -2832,6 +2840,7 @@ SAX.endElement(tr)
SAX.characters(
, 1)
SAX.endElement(table)
SAX.startElement(p)
SAX.characters(
@ -2840,9 +2849,14 @@ SAX.startElement(br)
SAX.endElement(br)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.endElement(body)
SAX.startElement(body)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.endElement(body)
SAX.endElement(html)
SAX.ignorableWhitespace(

15
tree.c
View File

@ -426,10 +426,17 @@ xmlCreateIntSubset(xmlDocPtr doc, const xmlChar *name,
} else {
xmlNodePtr prev;
prev = doc->last;
prev->next = (xmlNodePtr) cur;
cur->prev = prev;
doc->last = (xmlNodePtr) cur;
if (doc->type == XML_HTML_DOCUMENT_NODE) {
prev = doc->children;
prev->prev = (xmlNodePtr) cur;
cur->next = prev;
doc->children = (xmlNodePtr) cur;
} else {
prev = doc->last;
prev->next = (xmlNodePtr) cur;
cur->prev = prev;
doc->last = (xmlNodePtr) cur;
}
}
}
return(cur);