mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-04-24 18:50:07 +03:00
html: Stop reporting syntax errors
It doesn't make much sense to keep the old syntax error handling which doesn't conform to HTML5. Handling HTML5 parser errors is rather involved and not essential for parsers.
This commit is contained in:
parent
c6af101728
commit
e179f3ec0e
87
HTMLparser.c
87
HTMLparser.c
@ -2958,28 +2958,18 @@ htmlParseAttValue(htmlParserCtxtPtr ctxt) {
|
||||
if (CUR == '"') {
|
||||
SKIP(1);
|
||||
ret = htmlParseHTMLAttribute(ctxt, '"');
|
||||
if (CUR != '"') {
|
||||
htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
|
||||
"AttValue: \" expected\n", NULL, NULL);
|
||||
} else
|
||||
if (CUR == '"')
|
||||
SKIP(1);
|
||||
} else if (CUR == '\'') {
|
||||
SKIP(1);
|
||||
ret = htmlParseHTMLAttribute(ctxt, '\'');
|
||||
if (CUR != '\'') {
|
||||
htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
|
||||
"AttValue: ' expected\n", NULL, NULL);
|
||||
} else
|
||||
if (CUR == '\'')
|
||||
SKIP(1);
|
||||
} else {
|
||||
/*
|
||||
* That's an HTMLism, the attribute value may not be quoted
|
||||
*/
|
||||
ret = htmlParseHTMLAttribute(ctxt, 0);
|
||||
if (ret == NULL) {
|
||||
htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
|
||||
"AttValue: no value found\n", NULL, NULL);
|
||||
}
|
||||
}
|
||||
return(ret);
|
||||
}
|
||||
@ -3561,11 +3551,8 @@ htmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **value) {
|
||||
|
||||
*value = NULL;
|
||||
name = htmlParseHTMLName(ctxt, 1);
|
||||
if (name == NULL) {
|
||||
htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
|
||||
"error parsing attribute name\n", NULL, NULL);
|
||||
if (name == NULL)
|
||||
return(NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* read the value
|
||||
@ -3702,15 +3689,12 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
|
||||
|
||||
GROW;
|
||||
name = htmlParseHTMLName(ctxt, 0);
|
||||
if (name == NULL) {
|
||||
htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
|
||||
"htmlParseStartTag: invalid element name\n",
|
||||
NULL, NULL);
|
||||
if (name == NULL)
|
||||
return -1;
|
||||
}
|
||||
if (xmlStrEqual(name, BAD_CAST"meta"))
|
||||
meta = 1;
|
||||
|
||||
if ((ctxt->options & HTML_PARSE_HTML5) == 0) {
|
||||
/*
|
||||
* Check for auto-closure of HTML elements.
|
||||
*/
|
||||
@ -3752,6 +3736,7 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Now parse the attributes, it ends up with the ending
|
||||
@ -3778,8 +3763,6 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
|
||||
*/
|
||||
for (i = 0; i < nbatts;i += 2) {
|
||||
if (xmlStrEqual(atts[i], attname)) {
|
||||
htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_REDEFINED,
|
||||
"Attribute %s redefined\n", attname, NULL);
|
||||
if (attvalue != NULL)
|
||||
xmlFree(attvalue);
|
||||
goto failed;
|
||||
@ -3894,8 +3877,6 @@ htmlParseEndTag(htmlParserCtxtPtr ctxt)
|
||||
int i, ret;
|
||||
|
||||
if ((CUR != '<') || (NXT(1) != '/')) {
|
||||
htmlParseErr(ctxt, XML_ERR_LTSLASH_REQUIRED,
|
||||
"htmlParseEndTag: '</' not found\n", NULL, NULL);
|
||||
return (0);
|
||||
}
|
||||
SKIP(2);
|
||||
@ -4177,12 +4158,8 @@ htmlParseElementInternal(htmlParserCtxtPtr ctxt) {
|
||||
* Lookup the info for that element.
|
||||
*/
|
||||
info = htmlTagLookup(name);
|
||||
if (info == NULL) {
|
||||
htmlParseErr(ctxt, XML_HTML_UNKNOWN_TAG,
|
||||
"Tag %s invalid\n", name, NULL);
|
||||
} else {
|
||||
if (info != NULL)
|
||||
ctxt->endCheckState = info->dataMode;
|
||||
}
|
||||
|
||||
if (ctxt->record_info)
|
||||
htmlNodeInfoPush(ctxt, &node_info);
|
||||
@ -4201,22 +4178,9 @@ htmlParseElementInternal(htmlParserCtxtPtr ctxt) {
|
||||
return(0);
|
||||
}
|
||||
|
||||
if (CUR == '>') {
|
||||
SKIP(1);
|
||||
} else {
|
||||
htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
|
||||
"Couldn't find end of Start Tag %s\n", name, NULL);
|
||||
|
||||
/*
|
||||
* end of parsing of this node.
|
||||
*/
|
||||
if (xmlStrEqual(name, ctxt->name)) {
|
||||
htmlParserFinishElementParsing(ctxt);
|
||||
nodePop(ctxt);
|
||||
htmlnamePop(ctxt);
|
||||
}
|
||||
if (CUR != '>')
|
||||
return(0);
|
||||
}
|
||||
SKIP(1);
|
||||
|
||||
/*
|
||||
* Check for an Empty Element from DTD definition
|
||||
@ -4358,10 +4322,6 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) {
|
||||
* Wipe out everything which is before the first '<'
|
||||
*/
|
||||
SKIP_BLANKS;
|
||||
if (ctxt->input->cur >= ctxt->input->end) {
|
||||
htmlParseErr(ctxt, XML_ERR_DOCUMENT_EMPTY,
|
||||
"Document is empty\n", NULL, NULL);
|
||||
}
|
||||
|
||||
if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
|
||||
ctxt->sax->startDocument(ctxt->userData);
|
||||
@ -5018,12 +4978,8 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
||||
* Lookup the info for that element.
|
||||
*/
|
||||
info = htmlTagLookup(name);
|
||||
if (info == NULL) {
|
||||
htmlParseErr(ctxt, XML_HTML_UNKNOWN_TAG,
|
||||
"Tag %s invalid\n", name, NULL);
|
||||
} else {
|
||||
if (info != NULL)
|
||||
ctxt->endCheckState = info->dataMode;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for an Empty Element labeled the XML/SGML way
|
||||
@ -5041,28 +4997,9 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (CUR == '>') {
|
||||
SKIP(1);
|
||||
} else {
|
||||
htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
|
||||
"Couldn't find end of Start Tag %s\n",
|
||||
name, NULL);
|
||||
|
||||
/*
|
||||
* end of parsing of this node.
|
||||
*/
|
||||
if (xmlStrEqual(name, ctxt->name)) {
|
||||
htmlParserFinishElementParsing(ctxt);
|
||||
nodePop(ctxt);
|
||||
htmlnamePop(ctxt);
|
||||
}
|
||||
|
||||
if (ctxt->record_info)
|
||||
htmlNodeInfoPush(ctxt, &node_info);
|
||||
|
||||
ctxt->instate = XML_PARSER_CONTENT;
|
||||
if (CUR != '>')
|
||||
break;
|
||||
}
|
||||
SKIP(1);
|
||||
|
||||
/*
|
||||
* Check for an Empty Element from DTD definition
|
||||
|
@ -50,8 +50,7 @@ chunk = "ar</foo>"
|
||||
ctxt.htmlParseChunk(chunk, len(chunk), 1)
|
||||
ctxt=None
|
||||
|
||||
reference = """startDocument:startElement html None:startElement body None:startElement foo {'url': 'tst'}:error: Tag foo invalid
|
||||
:characters: bar:endElement foo:endElement body:endElement html:endDocument:"""
|
||||
reference = """startDocument:startElement html None:startElement body None:startElement foo {'url': 'tst'}:characters: bar:endElement foo:endElement body:endElement html:endDocument:"""
|
||||
if log != reference:
|
||||
print("Error got: %s" % log)
|
||||
print("Exprected: %s" % reference)
|
||||
|
@ -1,3 +0,0 @@
|
||||
./test/HTML/names.html:3: HTML parser error : Tag o:p invalid
|
||||
<o:p></o:p>
|
||||
^
|
@ -7,7 +7,6 @@ SAX.startElement(body)
|
||||
SAX.characters(
|
||||
, 3)
|
||||
SAX.startElement(o:p)
|
||||
SAX.error: Tag o:p invalid
|
||||
SAX.endElement(o:p)
|
||||
SAX.characters(
|
||||
, 1)
|
||||
|
@ -1,3 +0,0 @@
|
||||
./test/HTML/utf8bug.html:121: HTML parser error : Tag s1 invalid
|
||||
ز همکاران است. روی آن کلیک کند.</FONT></FONT></STRONG><S1
|
||||
^
|
@ -422,7 +422,6 @@ SAX.endElement(font)
|
||||
SAX.endElement(font)
|
||||
SAX.endElement(strong)
|
||||
SAX.startElement(s1)
|
||||
SAX.error: Tag s1 invalid
|
||||
SAX.characters( , 2)
|
||||
SAX.endElement(s1)
|
||||
SAX.endElement(div)
|
||||
|
@ -1,6 +1,3 @@
|
||||
./test/HTML/wired.html:25: HTML parser error : Tag nobr invalid
|
||||
<td bgcolor="#FF0000" align="left" valign="center"><nobr><img src="http://static
|
||||
^
|
||||
./test/HTML/wired.html:125: HTML parser error : Unexpected end tag : form
|
||||
</tr> </form>
|
||||
^
|
||||
|
@ -357,7 +357,6 @@ SAX.characters(
|
||||
, 3)
|
||||
SAX.startElement(td, bgcolor='#FF0000', align='left', valign='center')
|
||||
SAX.startElement(nobr)
|
||||
SAX.error: Tag nobr invalid
|
||||
SAX.startElement(img, src='http://static.wired.com/news/images/spacer.gif', width='344', height='1')
|
||||
SAX.endElement(img)
|
||||
SAX.startElement(br)
|
||||
|
Loading…
x
Reference in New Issue
Block a user