From a80f8b64a920e36518098b44976a7194704a1200 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Thu, 4 May 2023 15:59:31 +0200 Subject: [PATCH] html: Allow attributes in end tags Attribute are syntactically allowed in HTML5 end tags but otherwise ignored. --- HTMLparser.c | 35 +++++++++++++++++++++++++++-------- result/HTML/doc3.htm.err | 3 --- result/HTML/doc3.htm.sax | 1 - result/HTML/wired.html.err | 3 --- result/HTML/wired.html.sax | 1 - 5 files changed, 27 insertions(+), 16 deletions(-) diff --git a/HTMLparser.c b/HTMLparser.c index 6a21d5a0..5ced00ee 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -3980,20 +3980,39 @@ htmlParseEndTag(htmlParserCtxtPtr ctxt) name = htmlParseHTMLName(ctxt, 0); if (name == NULL) return (0); + /* - * We should definitely be at the ending "S? '>'" part + * Parse and ignore attributes. */ SKIP_BLANKS; - if (CUR != '>') { + while ((CUR != 0) && + (CUR != '>') && + ((CUR != '/') || (NXT(1) != '>')) && + (ctxt->instate != XML_PARSER_EOF)) { + xmlChar *attvalue = NULL; + + /* unexpected-solidus-in-tag */ + if (CUR == '/') { + NEXT; + SKIP_BLANKS; + continue; + } + GROW; + htmlParseAttribute(ctxt, &attvalue); + if (attvalue != NULL) + xmlFree(attvalue); + + SKIP_BLANKS; + } + + if (CUR == '>') { + NEXT; + } else if ((CUR == '/') && (NXT(1) == '>')) { + SKIP(2); + } else { htmlParseErr(ctxt, XML_ERR_GT_REQUIRED, "End tag : expected '>'\n", NULL, NULL); - /* Skip to next '>' */ - while ((PARSER_STOPPED(ctxt) == 0) && - (CUR != 0) && (CUR != '>')) - NEXT; } - if (CUR == '>') - NEXT; /* * if we ignored misplaced tags in htmlParseStartTag don't pop them diff --git a/result/HTML/doc3.htm.err b/result/HTML/doc3.htm.err index bb9204eb..7b7c5749 100644 --- a/result/HTML/doc3.htm.err +++ b/result/HTML/doc3.htm.err @@ -40,9 +40,6 @@ om/ad_static.asp?pid=2097&sid=1881&asid=7708"> ./test/HTML/doc3.htm:795: HTML parser error : Unexpected end tag : iframe document.write("42DF8478957377>"); ^ -./test/HTML/doc3.htm:804: HTML parser error : End tag : expected '>' - document.write("RIPT>"); - ^ ./test/HTML/doc3.htm:804: HTML parser error : Unexpected end tag : sc"); document.write("RIPT>"); ^ diff --git a/result/HTML/doc3.htm.sax b/result/HTML/doc3.htm.sax index f2009a95..5545f2d7 100644 --- a/result/HTML/doc3.htm.sax +++ b/result/HTML/doc3.htm.sax @@ -2696,7 +2696,6 @@ SAX.cdata( SAX.error: Unexpected end tag : iframe SAX.cdata("); } else if ((parseI, 463) -SAX.error: End tag : expected '>' SAX.error: Unexpected end tag : sc"); SAX.cdata("); } else { diff --git a/result/HTML/wired.html.err b/result/HTML/wired.html.err index 6b8d8184..244b98ed 100644 --- a/result/HTML/wired.html.err +++ b/result/HTML/wired.html.err @@ -217,9 +217,6 @@ wired.com&BANNER=Sprint" style="text-decoration:none">Spri ./test/HTML/wired.html:408: HTML parser error : Opening and ending tag mismatch: a and font com&BANNER=Sprint" style="text-decoration:none">Sprint ^ -./test/HTML/wired.html:408: HTML parser error : End tag : expected '>' -Sprint" style="text-decoration:none">SprintSprint ^ diff --git a/result/HTML/wired.html.sax b/result/HTML/wired.html.sax index ce51ed60..54f4a9a6 100644 --- a/result/HTML/wired.html.sax +++ b/result/HTML/wired.html.sax @@ -1960,7 +1960,6 @@ SAX.error: Opening and ending tag mismatch: a and font SAX.endElement(font) SAX.endElement(a) SAX.endElement(i) -SAX.error: End tag : expected '>' SAX.error: Unexpected end tag : font< SAX.startElement(br) SAX.endElement(br)