mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-04-24 18:50:07 +03:00
html: Stop reporting syntax errors
It doesn't make much sense to keep the old syntax error handling which doesn't conform to HTML5. Handling HTML5 parser errors is rather involved and not essential for parsers.
This commit is contained in:
parent
c6af101728
commit
e179f3ec0e
167
HTMLparser.c
167
HTMLparser.c
@ -2958,28 +2958,18 @@ htmlParseAttValue(htmlParserCtxtPtr ctxt) {
|
|||||||
if (CUR == '"') {
|
if (CUR == '"') {
|
||||||
SKIP(1);
|
SKIP(1);
|
||||||
ret = htmlParseHTMLAttribute(ctxt, '"');
|
ret = htmlParseHTMLAttribute(ctxt, '"');
|
||||||
if (CUR != '"') {
|
if (CUR == '"')
|
||||||
htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
|
|
||||||
"AttValue: \" expected\n", NULL, NULL);
|
|
||||||
} else
|
|
||||||
SKIP(1);
|
SKIP(1);
|
||||||
} else if (CUR == '\'') {
|
} else if (CUR == '\'') {
|
||||||
SKIP(1);
|
SKIP(1);
|
||||||
ret = htmlParseHTMLAttribute(ctxt, '\'');
|
ret = htmlParseHTMLAttribute(ctxt, '\'');
|
||||||
if (CUR != '\'') {
|
if (CUR == '\'')
|
||||||
htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
|
|
||||||
"AttValue: ' expected\n", NULL, NULL);
|
|
||||||
} else
|
|
||||||
SKIP(1);
|
SKIP(1);
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
* That's an HTMLism, the attribute value may not be quoted
|
* That's an HTMLism, the attribute value may not be quoted
|
||||||
*/
|
*/
|
||||||
ret = htmlParseHTMLAttribute(ctxt, 0);
|
ret = htmlParseHTMLAttribute(ctxt, 0);
|
||||||
if (ret == NULL) {
|
|
||||||
htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
|
|
||||||
"AttValue: no value found\n", NULL, NULL);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return(ret);
|
return(ret);
|
||||||
}
|
}
|
||||||
@ -3561,11 +3551,8 @@ htmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **value) {
|
|||||||
|
|
||||||
*value = NULL;
|
*value = NULL;
|
||||||
name = htmlParseHTMLName(ctxt, 1);
|
name = htmlParseHTMLName(ctxt, 1);
|
||||||
if (name == NULL) {
|
if (name == NULL)
|
||||||
htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
|
|
||||||
"error parsing attribute name\n", NULL, NULL);
|
|
||||||
return(NULL);
|
return(NULL);
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* read the value
|
* read the value
|
||||||
@ -3702,55 +3689,53 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
|
|||||||
|
|
||||||
GROW;
|
GROW;
|
||||||
name = htmlParseHTMLName(ctxt, 0);
|
name = htmlParseHTMLName(ctxt, 0);
|
||||||
if (name == NULL) {
|
if (name == NULL)
|
||||||
htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
|
|
||||||
"htmlParseStartTag: invalid element name\n",
|
|
||||||
NULL, NULL);
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
|
||||||
if (xmlStrEqual(name, BAD_CAST"meta"))
|
if (xmlStrEqual(name, BAD_CAST"meta"))
|
||||||
meta = 1;
|
meta = 1;
|
||||||
|
|
||||||
/*
|
if ((ctxt->options & HTML_PARSE_HTML5) == 0) {
|
||||||
* Check for auto-closure of HTML elements.
|
/*
|
||||||
*/
|
* Check for auto-closure of HTML elements.
|
||||||
htmlAutoClose(ctxt, name);
|
*/
|
||||||
|
htmlAutoClose(ctxt, name);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check for implied HTML elements.
|
* Check for implied HTML elements.
|
||||||
*/
|
*/
|
||||||
htmlCheckImplied(ctxt, name);
|
htmlCheckImplied(ctxt, name);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Avoid html at any level > 0, head at any level != 1
|
* Avoid html at any level > 0, head at any level != 1
|
||||||
* or any attempt to recurse body
|
* or any attempt to recurse body
|
||||||
*/
|
*/
|
||||||
if ((ctxt->nameNr > 0) && (xmlStrEqual(name, BAD_CAST"html"))) {
|
if ((ctxt->nameNr > 0) && (xmlStrEqual(name, BAD_CAST"html"))) {
|
||||||
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
|
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
|
||||||
"htmlParseStartTag: misplaced <html> tag\n",
|
"htmlParseStartTag: misplaced <html> tag\n",
|
||||||
name, NULL);
|
name, NULL);
|
||||||
discardtag = 1;
|
discardtag = 1;
|
||||||
ctxt->depth++;
|
ctxt->depth++;
|
||||||
}
|
}
|
||||||
if ((ctxt->nameNr != 1) &&
|
if ((ctxt->nameNr != 1) &&
|
||||||
(xmlStrEqual(name, BAD_CAST"head"))) {
|
(xmlStrEqual(name, BAD_CAST"head"))) {
|
||||||
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
|
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
|
||||||
"htmlParseStartTag: misplaced <head> tag\n",
|
"htmlParseStartTag: misplaced <head> tag\n",
|
||||||
name, NULL);
|
name, NULL);
|
||||||
discardtag = 1;
|
discardtag = 1;
|
||||||
ctxt->depth++;
|
ctxt->depth++;
|
||||||
}
|
}
|
||||||
if (xmlStrEqual(name, BAD_CAST"body")) {
|
if (xmlStrEqual(name, BAD_CAST"body")) {
|
||||||
int indx;
|
int indx;
|
||||||
for (indx = 0;indx < ctxt->nameNr;indx++) {
|
for (indx = 0;indx < ctxt->nameNr;indx++) {
|
||||||
if (xmlStrEqual(ctxt->nameTab[indx], BAD_CAST"body")) {
|
if (xmlStrEqual(ctxt->nameTab[indx], BAD_CAST"body")) {
|
||||||
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
|
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
|
||||||
"htmlParseStartTag: misplaced <body> tag\n",
|
"htmlParseStartTag: misplaced <body> tag\n",
|
||||||
name, NULL);
|
name, NULL);
|
||||||
discardtag = 1;
|
discardtag = 1;
|
||||||
ctxt->depth++;
|
ctxt->depth++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -3778,8 +3763,6 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
|
|||||||
*/
|
*/
|
||||||
for (i = 0; i < nbatts;i += 2) {
|
for (i = 0; i < nbatts;i += 2) {
|
||||||
if (xmlStrEqual(atts[i], attname)) {
|
if (xmlStrEqual(atts[i], attname)) {
|
||||||
htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_REDEFINED,
|
|
||||||
"Attribute %s redefined\n", attname, NULL);
|
|
||||||
if (attvalue != NULL)
|
if (attvalue != NULL)
|
||||||
xmlFree(attvalue);
|
xmlFree(attvalue);
|
||||||
goto failed;
|
goto failed;
|
||||||
@ -3894,8 +3877,6 @@ htmlParseEndTag(htmlParserCtxtPtr ctxt)
|
|||||||
int i, ret;
|
int i, ret;
|
||||||
|
|
||||||
if ((CUR != '<') || (NXT(1) != '/')) {
|
if ((CUR != '<') || (NXT(1) != '/')) {
|
||||||
htmlParseErr(ctxt, XML_ERR_LTSLASH_REQUIRED,
|
|
||||||
"htmlParseEndTag: '</' not found\n", NULL, NULL);
|
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
SKIP(2);
|
SKIP(2);
|
||||||
@ -4177,12 +4158,8 @@ htmlParseElementInternal(htmlParserCtxtPtr ctxt) {
|
|||||||
* Lookup the info for that element.
|
* Lookup the info for that element.
|
||||||
*/
|
*/
|
||||||
info = htmlTagLookup(name);
|
info = htmlTagLookup(name);
|
||||||
if (info == NULL) {
|
if (info != NULL)
|
||||||
htmlParseErr(ctxt, XML_HTML_UNKNOWN_TAG,
|
|
||||||
"Tag %s invalid\n", name, NULL);
|
|
||||||
} else {
|
|
||||||
ctxt->endCheckState = info->dataMode;
|
ctxt->endCheckState = info->dataMode;
|
||||||
}
|
|
||||||
|
|
||||||
if (ctxt->record_info)
|
if (ctxt->record_info)
|
||||||
htmlNodeInfoPush(ctxt, &node_info);
|
htmlNodeInfoPush(ctxt, &node_info);
|
||||||
@ -4201,22 +4178,9 @@ htmlParseElementInternal(htmlParserCtxtPtr ctxt) {
|
|||||||
return(0);
|
return(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (CUR == '>') {
|
if (CUR != '>')
|
||||||
SKIP(1);
|
return(0);
|
||||||
} else {
|
SKIP(1);
|
||||||
htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
|
|
||||||
"Couldn't find end of Start Tag %s\n", name, NULL);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* end of parsing of this node.
|
|
||||||
*/
|
|
||||||
if (xmlStrEqual(name, ctxt->name)) {
|
|
||||||
htmlParserFinishElementParsing(ctxt);
|
|
||||||
nodePop(ctxt);
|
|
||||||
htmlnamePop(ctxt);
|
|
||||||
}
|
|
||||||
return(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check for an Empty Element from DTD definition
|
* Check for an Empty Element from DTD definition
|
||||||
@ -4358,10 +4322,6 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) {
|
|||||||
* Wipe out everything which is before the first '<'
|
* Wipe out everything which is before the first '<'
|
||||||
*/
|
*/
|
||||||
SKIP_BLANKS;
|
SKIP_BLANKS;
|
||||||
if (ctxt->input->cur >= ctxt->input->end) {
|
|
||||||
htmlParseErr(ctxt, XML_ERR_DOCUMENT_EMPTY,
|
|
||||||
"Document is empty\n", NULL, NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
|
if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
|
||||||
ctxt->sax->startDocument(ctxt->userData);
|
ctxt->sax->startDocument(ctxt->userData);
|
||||||
@ -5018,12 +4978,8 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
|||||||
* Lookup the info for that element.
|
* Lookup the info for that element.
|
||||||
*/
|
*/
|
||||||
info = htmlTagLookup(name);
|
info = htmlTagLookup(name);
|
||||||
if (info == NULL) {
|
if (info != NULL)
|
||||||
htmlParseErr(ctxt, XML_HTML_UNKNOWN_TAG,
|
|
||||||
"Tag %s invalid\n", name, NULL);
|
|
||||||
} else {
|
|
||||||
ctxt->endCheckState = info->dataMode;
|
ctxt->endCheckState = info->dataMode;
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check for an Empty Element labeled the XML/SGML way
|
* Check for an Empty Element labeled the XML/SGML way
|
||||||
@ -5041,28 +4997,9 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (CUR == '>') {
|
if (CUR != '>')
|
||||||
SKIP(1);
|
break;
|
||||||
} else {
|
SKIP(1);
|
||||||
htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
|
|
||||||
"Couldn't find end of Start Tag %s\n",
|
|
||||||
name, NULL);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* end of parsing of this node.
|
|
||||||
*/
|
|
||||||
if (xmlStrEqual(name, ctxt->name)) {
|
|
||||||
htmlParserFinishElementParsing(ctxt);
|
|
||||||
nodePop(ctxt);
|
|
||||||
htmlnamePop(ctxt);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ctxt->record_info)
|
|
||||||
htmlNodeInfoPush(ctxt, &node_info);
|
|
||||||
|
|
||||||
ctxt->instate = XML_PARSER_CONTENT;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check for an Empty Element from DTD definition
|
* Check for an Empty Element from DTD definition
|
||||||
|
@ -50,8 +50,7 @@ chunk = "ar</foo>"
|
|||||||
ctxt.htmlParseChunk(chunk, len(chunk), 1)
|
ctxt.htmlParseChunk(chunk, len(chunk), 1)
|
||||||
ctxt=None
|
ctxt=None
|
||||||
|
|
||||||
reference = """startDocument:startElement html None:startElement body None:startElement foo {'url': 'tst'}:error: Tag foo invalid
|
reference = """startDocument:startElement html None:startElement body None:startElement foo {'url': 'tst'}:characters: bar:endElement foo:endElement body:endElement html:endDocument:"""
|
||||||
:characters: bar:endElement foo:endElement body:endElement html:endDocument:"""
|
|
||||||
if log != reference:
|
if log != reference:
|
||||||
print("Error got: %s" % log)
|
print("Error got: %s" % log)
|
||||||
print("Exprected: %s" % reference)
|
print("Exprected: %s" % reference)
|
||||||
|
@ -1,3 +0,0 @@
|
|||||||
./test/HTML/names.html:3: HTML parser error : Tag o:p invalid
|
|
||||||
<o:p></o:p>
|
|
||||||
^
|
|
@ -7,7 +7,6 @@ SAX.startElement(body)
|
|||||||
SAX.characters(
|
SAX.characters(
|
||||||
, 3)
|
, 3)
|
||||||
SAX.startElement(o:p)
|
SAX.startElement(o:p)
|
||||||
SAX.error: Tag o:p invalid
|
|
||||||
SAX.endElement(o:p)
|
SAX.endElement(o:p)
|
||||||
SAX.characters(
|
SAX.characters(
|
||||||
, 1)
|
, 1)
|
||||||
|
@ -1,3 +0,0 @@
|
|||||||
./test/HTML/utf8bug.html:121: HTML parser error : Tag s1 invalid
|
|
||||||
ز همکاران است. روی آن کلیک کند.</FONT></FONT></STRONG><S1
|
|
||||||
^
|
|
@ -422,7 +422,6 @@ SAX.endElement(font)
|
|||||||
SAX.endElement(font)
|
SAX.endElement(font)
|
||||||
SAX.endElement(strong)
|
SAX.endElement(strong)
|
||||||
SAX.startElement(s1)
|
SAX.startElement(s1)
|
||||||
SAX.error: Tag s1 invalid
|
|
||||||
SAX.characters( , 2)
|
SAX.characters( , 2)
|
||||||
SAX.endElement(s1)
|
SAX.endElement(s1)
|
||||||
SAX.endElement(div)
|
SAX.endElement(div)
|
||||||
|
@ -1,6 +1,3 @@
|
|||||||
./test/HTML/wired.html:25: HTML parser error : Tag nobr invalid
|
|
||||||
<td bgcolor="#FF0000" align="left" valign="center"><nobr><img src="http://static
|
|
||||||
^
|
|
||||||
./test/HTML/wired.html:125: HTML parser error : Unexpected end tag : form
|
./test/HTML/wired.html:125: HTML parser error : Unexpected end tag : form
|
||||||
</tr> </form>
|
</tr> </form>
|
||||||
^
|
^
|
||||||
|
@ -357,7 +357,6 @@ SAX.characters(
|
|||||||
, 3)
|
, 3)
|
||||||
SAX.startElement(td, bgcolor='#FF0000', align='left', valign='center')
|
SAX.startElement(td, bgcolor='#FF0000', align='left', valign='center')
|
||||||
SAX.startElement(nobr)
|
SAX.startElement(nobr)
|
||||||
SAX.error: Tag nobr invalid
|
|
||||||
SAX.startElement(img, src='http://static.wired.com/news/images/spacer.gif', width='344', height='1')
|
SAX.startElement(img, src='http://static.wired.com/news/images/spacer.gif', width='344', height='1')
|
||||||
SAX.endElement(img)
|
SAX.endElement(img)
|
||||||
SAX.startElement(br)
|
SAX.startElement(br)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user