1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-01-12 09:17:37 +03:00

applied patch from James Bursa fixing an html parsing bug in push mode

* HTMLparser.c: applied patch from James Bursa fixing an html parsing
  bug in push mode
* result/HTML/repeat.html* test/HTML/repeat.html: added the test to the
  regression suite
Daniel
This commit is contained in:
Daniel Veillard 2005-07-03 23:00:18 +00:00
parent 39e5c89016
commit 597f1c1f34
6 changed files with 47 additions and 22 deletions

View File

@ -1,3 +1,10 @@
Mon Jul 4 00:58:44 CEST 2005 Daniel Veillard <daniel@veillard.com>
* HTMLparser.c: applied patch from James Bursa fixing an html parsing
bug in push mode
* result/HTML/repeat.html* test/HTML/repeat.html: added the test to the
regression suite
Sun Jul 3 23:42:31 CEST 2005 Daniel Veillard <daniel@veillard.com>
* testapi.c tree.c: fixing a leak detected by testapi in

View File

@ -3349,9 +3349,10 @@ htmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **atts) {
*
* [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
*
* Returns 0 in case of success and -1 in case of error.
*/
static void
static int
htmlParseStartTag(htmlParserCtxtPtr ctxt) {
const xmlChar *name;
const xmlChar *attname;
@ -3365,9 +3366,9 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
if ((ctxt == NULL) || (ctxt->input == NULL)) {
htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
"htmlParseStartTag: context error\n", NULL, NULL);
return;
return -1;
}
if (CUR != '<') return;
if (CUR != '<') return -1;
NEXT;
GROW;
@ -3379,7 +3380,7 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
/* Dump the bogus tag like browsers do */
while ((IS_CHAR_CH(CUR)) && (CUR != '>'))
NEXT;
return;
return -1;
}
if (xmlStrEqual(name, BAD_CAST"meta"))
meta = 1;
@ -3402,14 +3403,14 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
"htmlParseStartTag: misplaced <html> tag\n",
name, NULL);
return;
return 0;
}
if ((ctxt->nameNr != 1) &&
(xmlStrEqual(name, BAD_CAST"head"))) {
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
"htmlParseStartTag: misplaced <head> tag\n",
name, NULL);
return;
return 0;
}
if (xmlStrEqual(name, BAD_CAST"body")) {
int indx;
@ -3420,7 +3421,7 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
name, NULL);
while ((IS_CHAR_CH(CUR)) && (CUR != '>'))
NEXT;
return;
return 0;
}
}
}
@ -3533,6 +3534,8 @@ failed:
xmlFree((xmlChar *) atts[i]);
}
}
return 0;
}
/**
@ -3847,16 +3850,15 @@ htmlParseElement(htmlParserCtxtPtr ctxt) {
xmlChar *currentNode = NULL;
const htmlElemDesc * info;
htmlParserNodeInfo node_info;
const xmlChar *oldname;
int failed;
int depth;
const xmlChar *oldptr;
if ((ctxt == NULL) || (ctxt->input == NULL)) {
htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
"htmlParseStartTag: context error\n", NULL, NULL);
"htmlParseElement: context error\n", NULL, NULL);
return;
}
depth = ctxt->nameNr;
/* Capture start position */
if (ctxt->record_info) {
node_info.begin_pos = ctxt->input->consumed +
@ -3864,11 +3866,9 @@ htmlParseElement(htmlParserCtxtPtr ctxt) {
node_info.begin_line = ctxt->input->line;
}
oldname = ctxt->name;
htmlParseStartTag(ctxt);
failed = htmlParseStartTag(ctxt);
name = ctxt->name;
if (((depth == ctxt->nameNr) && (xmlStrEqual(oldname, ctxt->name))) ||
(name == NULL)) {
if (failed || (name == NULL)) {
if (CUR == '>')
NEXT;
return;
@ -4577,11 +4577,11 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
#endif
} else {
ctxt->instate = XML_PARSER_MISC;
}
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"HPP: entering MISC\n");
xmlGenericError(xmlGenericErrorContext,
"HPP: entering MISC\n");
#endif
}
break;
case XML_PARSER_MISC:
SKIP_BLANKS;
@ -4739,7 +4739,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
break;
case XML_PARSER_START_TAG: {
const xmlChar *name, *oldname;
int depth = ctxt->nameNr;
int failed;
const htmlElemDesc * info;
if (avail < 2)
@ -4766,11 +4766,9 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
(htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
goto done;
oldname = ctxt->name;
htmlParseStartTag(ctxt);
failed = htmlParseStartTag(ctxt);
name = ctxt->name;
if (((depth == ctxt->nameNr) &&
(xmlStrEqual(oldname, ctxt->name))) ||
if (failed ||
(name == NULL)) {
if (CUR == '>')
NEXT;

5
result/HTML/repeat.html Normal file
View File

@ -0,0 +1,5 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
<html><body>
<td></td>
<td><!-- <a><b> --></td>
</body></html>

View File

View File

@ -0,0 +1,14 @@
SAX.setDocumentLocator()
SAX.startDocument()
SAX.startElement(html)
SAX.startElement(body)
SAX.startElement(td)
SAX.endElement(td)
SAX.startElement(td)
SAX.comment( <a><b> )
SAX.ignorableWhitespace(
, 1)
SAX.endElement(td)
SAX.endElement(body)
SAX.endElement(html)
SAX.endDocument()

1
test/HTML/repeat.html Normal file
View File

@ -0,0 +1 @@
<td><td><!-- <a><b> -->