diff --git a/HTMLparser.c b/HTMLparser.c
index d5c8e0e2..a48b2318 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -40,8 +40,6 @@
static int htmlOmittedDefaultValue = 1;
-static void htmlParseComment(htmlParserCtxtPtr ctxt);
-
static int
htmlParseElementInternal(htmlParserCtxtPtr ctxt);
@@ -2545,23 +2543,6 @@ htmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID) {
static const xmlChar * htmlParseNameComplex(xmlParserCtxtPtr ctxt);
-static void
-htmlSkipBogusComment(htmlParserCtxtPtr ctxt) {
- int c;
-
- htmlParseErr(ctxt, XML_HTML_INCORRECTLY_OPENED_COMMENT,
- "Incorrectly opened comment\n", NULL, NULL);
-
- while (PARSER_STOPPED(ctxt) == 0) {
- c = CUR;
- if (c == 0)
- break;
- NEXT;
- if (c == '>')
- break;
- }
-}
-
/**
* htmlParseHTMLName:
* @ctxt: an HTML parser context
@@ -3368,147 +3349,27 @@ htmlParseExternalID(htmlParserCtxtPtr ctxt, xmlChar **publicID) {
return(URI);
}
-/**
- * htmlParsePI:
- * @ctxt: an HTML parser context
- *
- * Parse an XML Processing Instruction. HTML5 doesn't allow processing
- * instructions, so this will be removed at some point.
- */
-static void
-htmlParsePI(htmlParserCtxtPtr ctxt) {
- xmlChar *buf = NULL;
- int len = 0;
- int size = HTML_PARSER_BUFFER_SIZE;
- int cur, l;
- int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
- XML_MAX_HUGE_LENGTH :
- XML_MAX_TEXT_LENGTH;
- const xmlChar *target;
- xmlParserInputState state;
-
- if ((RAW == '<') && (NXT(1) == '?')) {
- state = ctxt->instate;
- ctxt->instate = XML_PARSER_PI;
- /*
- * this is a Processing Instruction.
- */
- SKIP(2);
-
- /*
- * Parse the target name and check for special support like
- * namespace.
- */
- target = htmlParseName(ctxt);
- if (target != NULL) {
- if (RAW == '>') {
- SKIP(1);
-
- /*
- * SAX: PI detected.
- */
- if ((ctxt->sax) && (!ctxt->disableSAX) &&
- (ctxt->sax->processingInstruction != NULL))
- ctxt->sax->processingInstruction(ctxt->userData,
- target, NULL);
- goto done;
- }
- buf = xmlMalloc(size);
- if (buf == NULL) {
- htmlErrMemory(ctxt);
- return;
- }
- cur = CUR;
- if (!IS_BLANK(cur)) {
- htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED,
- "ParsePI: PI %s space expected\n", target, NULL);
- }
- SKIP_BLANKS;
- cur = CUR_CHAR(l);
- while ((cur != 0) && (cur != '>')) {
- if (len + 5 >= size) {
- xmlChar *tmp;
-
- size *= 2;
- tmp = (xmlChar *) xmlRealloc(buf, size);
- if (tmp == NULL) {
- htmlErrMemory(ctxt);
- xmlFree(buf);
- return;
- }
- buf = tmp;
- }
- if (IS_CHAR(cur)) {
- COPY_BUF(buf,len,cur);
- } else {
- htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
- "Invalid char in processing instruction "
- "0x%X\n", cur);
- }
- if (len > maxLength) {
- htmlParseErr(ctxt, XML_ERR_PI_NOT_FINISHED,
- "PI %s too long", target, NULL);
- xmlFree(buf);
- goto done;
- }
- NEXTL(l);
- cur = CUR_CHAR(l);
- }
- buf[len] = 0;
- if (cur != '>') {
- htmlParseErr(ctxt, XML_ERR_PI_NOT_FINISHED,
- "ParsePI: PI %s never end ...\n", target, NULL);
- } else {
- SKIP(1);
-
- /*
- * SAX: PI detected.
- */
- if ((ctxt->sax) && (!ctxt->disableSAX) &&
- (ctxt->sax->processingInstruction != NULL))
- ctxt->sax->processingInstruction(ctxt->userData,
- target, buf);
- }
- xmlFree(buf);
- } else {
- htmlParseErr(ctxt, XML_ERR_PI_NOT_STARTED,
- "PI is not started correctly", NULL, NULL);
- }
-
-done:
- ctxt->instate = state;
- }
-}
-
/**
* htmlParseComment:
* @ctxt: an HTML parser context
+ * @bogus: true if this is a bogus comment
*
* Parse an HTML comment
*/
static void
-htmlParseComment(htmlParserCtxtPtr ctxt) {
+htmlParseComment(htmlParserCtxtPtr ctxt, int bogus) {
xmlChar *buf = NULL;
int len;
int size = HTML_PARSER_BUFFER_SIZE;
- int q, ql;
- int r, rl;
int cur, l;
- int next, nl;
int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
XML_MAX_HUGE_LENGTH :
XML_MAX_TEXT_LENGTH;
xmlParserInputState state;
- /*
- * Check that there is a comment right here.
- */
- if ((RAW != '<') || (NXT(1) != '!') ||
- (NXT(2) != '-') || (NXT(3) != '-')) return;
-
state = ctxt->instate;
ctxt->instate = XML_PARSER_COMMENT;
- SKIP(4);
+
buf = xmlMalloc(size);
if (buf == NULL) {
htmlErrMemory(ctxt);
@@ -3516,36 +3377,34 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
}
len = 0;
buf[len] = 0;
- q = CUR_CHAR(ql);
- if (q == 0)
- goto unfinished;
- if (q == '>') {
- htmlParseErr(ctxt, XML_ERR_COMMENT_ABRUPTLY_ENDED, "Comment abruptly ended", NULL, NULL);
- cur = '>';
- goto finished;
- }
- NEXTL(ql);
- r = CUR_CHAR(rl);
- if (r == 0)
- goto unfinished;
- if (q == '-' && r == '>') {
- htmlParseErr(ctxt, XML_ERR_COMMENT_ABRUPTLY_ENDED, "Comment abruptly ended", NULL, NULL);
- cur = '>';
- goto finished;
- }
- NEXTL(rl);
- cur = CUR_CHAR(l);
- while ((cur != 0) &&
- ((cur != '>') ||
- (r != '-') || (q != '-'))) {
- NEXTL(l);
- next = CUR_CHAR(nl);
- if ((q == '-') && (r == '-') && (cur == '!') && (next == '>')) {
- htmlParseErr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
- "Comment incorrectly closed by '--!>'", NULL, NULL);
- cur = '>';
- break;
+ cur = CUR_CHAR(l);
+ if (!bogus) {
+ if (cur == '>') {
+ SKIP(1);
+ goto done;
+ } else if ((cur == '-') && (NXT(1) == '>')) {
+ SKIP(2);
+ goto done;
+ }
+ }
+
+ while (cur != 0) {
+ if (bogus) {
+ if (cur == '>') {
+ SKIP(1);
+ break;
+ }
+ } else {
+ if ((cur == '-') && (NXT(1) == '-')) {
+ if (NXT(2) == '>') {
+ SKIP(3);
+ break;
+ } else if ((NXT(2) == '!') && (NXT(3) == '>')) {
+ SKIP(4);
+ break;
+ }
+ }
}
if (len + 5 >= size) {
@@ -3556,15 +3415,16 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
if (tmp == NULL) {
xmlFree(buf);
htmlErrMemory(ctxt);
+ ctxt->instate = state;
return;
}
buf = tmp;
}
- if (IS_CHAR(q)) {
- COPY_BUF(buf,len,q);
+ if (IS_CHAR(cur)) {
+ COPY_BUF(buf,len,cur);
} else {
htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
- "Invalid char in comment 0x%X\n", q);
+ "Invalid char in comment 0x%X\n", cur);
}
if (len > maxLength) {
htmlParseErr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
@@ -3574,29 +3434,19 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
return;
}
- q = r;
- ql = rl;
- r = cur;
- rl = l;
- cur = next;
- l = nl;
- }
-finished:
- buf[len] = 0;
- if (cur == '>') {
- SKIP(1);
- if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
- (!ctxt->disableSAX))
- ctxt->sax->comment(ctxt->userData, buf);
- xmlFree(buf);
- ctxt->instate = state;
- return;
+ NEXTL(l);
+ cur = CUR_CHAR(l);
}
-unfinished:
- htmlParseErr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
- "Comment not terminated \n
diff --git a/result/HTML/758518-tag.html.err b/result/HTML/758518-tag.html.err
deleted file mode 100644
index c912c91f..00000000
--- a/result/HTML/758518-tag.html.err
+++ /dev/null
@@ -1,3 +0,0 @@
-./test/HTML/758518-tag.html:1: HTML parser error : PI is not started correctly
-
-^
diff --git a/result/HTML/758518-tag.html.sax b/result/HTML/758518-tag.html.sax
index fd4aa949..d94eb193 100644
--- a/result/HTML/758518-tag.html.sax
+++ b/result/HTML/758518-tag.html.sax
@@ -1,10 +1,4 @@
SAX.setDocumentLocator()
SAX.startDocument()
-SAX.error: PI is not started correctlySAX.startElement(html)
-SAX.startElement(body)
-SAX.startElement(p)
-SAX.characters(, 2)
-SAX.endElement(p)
-SAX.endElement(body)
-SAX.endElement(html)
+SAX.comment(?a)
SAX.endDocument()
diff --git a/result/HTML/758606.html b/result/HTML/758606.html
index 3974ca90..ee62ed7b 100644
--- a/result/HTML/758606.html
+++ b/result/HTML/758606.html
@@ -1,2 +1,3 @@
-
+
diff --git a/result/HTML/758606.html.err b/result/HTML/758606.html.err
index bcb253eb..523b8b8b 100644
--- a/result/HTML/758606.html.err
+++ b/result/HTML/758606.html.err
@@ -1,7 +1,3 @@
./test/HTML/758606.html:1: HTML parser error : Invalid char in comment 0xC
diff --git a/result/HTML/758606_2.html.err b/result/HTML/758606_2.html.err
index 88bcde6b..104a5e43 100644
--- a/result/HTML/758606_2.html.err
+++ b/result/HTML/758606_2.html.err
@@ -1,7 +1,3 @@
./test/HTML/758606_2.html:1: HTML parser error : Invalid char in comment 0xC
-'
- whatwg guidance is
- ^
diff --git a/result/HTML/comments.html.sax b/result/HTML/comments.html.sax
index ee8fcd7b..011c6dc0 100644
--- a/result/HTML/comments.html.sax
+++ b/result/HTML/comments.html.sax
@@ -24,7 +24,7 @@ SAX.characters(
SAX.startElement(div)
SAX.characters(
, 9)
-SAX.error: Comment incorrectly closed by '--!>'SAX.comment(incorrectly closed comment)
+SAX.comment(incorrectly closed comment)
SAX.startElement(span, id='under-test')
SAX.characters(whatwg guidance is that this s, 49)
SAX.endElement(span)
diff --git a/result/HTML/comments2.html.err b/result/HTML/comments2.html.err
deleted file mode 100644
index 8d1f5926..00000000
--- a/result/HTML/comments2.html.err
+++ /dev/null
@@ -1,3 +0,0 @@
-./test/HTML/comments2.html:10: HTML parser error : Comment incorrectly closed by '--!>'
- whatwg guidance is
- ^
diff --git a/result/HTML/comments2.html.sax b/result/HTML/comments2.html.sax
index d694f04f..4958a4f1 100644
--- a/result/HTML/comments2.html.sax
+++ b/result/HTML/comments2.html.sax
@@ -24,7 +24,7 @@ SAX.characters(
SAX.startElement(div)
SAX.characters(
, 9)
-SAX.error: Comment incorrectly closed by '--!>'SAX.comment(incorrectly closed comment)
+SAX.comment(incorrectly closed comment)
SAX.startElement(span, id='under-test')
SAX.characters(whatwg guidance is that this s, 49)
SAX.endElement(span)
diff --git a/result/HTML/comments3.html.err b/result/HTML/comments3.html.err
deleted file mode 100644
index 60e927ba..00000000
--- a/result/HTML/comments3.html.err
+++ /dev/null
@@ -1,6 +0,0 @@
-./test/HTML/comments3.html:10: HTML parser error : Comment abruptly ended
- the previous node should be an empty comment, and this should be a
- ^
-./test/HTML/comments3.html:13: HTML parser error : Comment abruptly ended
- the previous node should be an empty comment, and this should be a
- ^
diff --git a/result/HTML/comments3.html.sax b/result/HTML/comments3.html.sax
index ab783f94..9f40a450 100644
--- a/result/HTML/comments3.html.sax
+++ b/result/HTML/comments3.html.sax
@@ -24,7 +24,7 @@ SAX.characters(
SAX.startElement(div)
SAX.characters(
, 9)
-SAX.error: Comment abruptly endedSAX.comment()
+SAX.comment()
SAX.characters(the previous node should be an, 86)
SAX.endElement(div)
SAX.characters(
@@ -32,7 +32,7 @@ SAX.characters(
SAX.startElement(div)
SAX.characters(
, 9)
-SAX.error: Comment abruptly endedSAX.comment()
+SAX.comment()
SAX.characters(the previous node should be an, 86)
SAX.endElement(div)
SAX.characters(
diff --git a/result/HTML/issue380.html b/result/HTML/issue380.html
index 1fcf4965..13b7db1f 100644
--- a/result/HTML/issue380.html
+++ b/result/HTML/issue380.html
@@ -1,6 +1,6 @@
- ...
+ ...
diff --git a/result/HTML/issue380.html.err b/result/HTML/issue380.html.err
deleted file mode 100644
index efbb8bdf..00000000
--- a/result/HTML/issue380.html.err
+++ /dev/null
@@ -1,6 +0,0 @@
-./test/HTML/issue380.html:3: HTML parser error : Incorrectly opened comment
- ...
- ^
-./test/HTML/issue380.html:3: HTML parser error : Incorrectly opened comment
- ...
- ^
diff --git a/result/HTML/issue380.html.sax b/result/HTML/issue380.html.sax
index 5df2b506..b53845cf 100644
--- a/result/HTML/issue380.html.sax
+++ b/result/HTML/issue380.html.sax
@@ -6,9 +6,9 @@ SAX.characters(
SAX.startElement(body)
SAX.characters(
, 5)
-SAX.error: Incorrectly opened comment
+SAX.comment([if !supportLists])
SAX.characters(..., 3)
-SAX.error: Incorrectly opened comment
+SAX.comment([endif])
SAX.characters(
, 3)
SAX.endElement(body)
diff --git a/result/HTML/python.html b/result/HTML/python.html
index ea0be18e..befe71e0 100644
--- a/result/HTML/python.html
+++ b/result/HTML/python.html
@@ -1,5 +1,5 @@
-
+
Python Programming Language
diff --git a/result/HTML/python.html.sax b/result/HTML/python.html.sax
index ec300d35..f825954f 100644
--- a/result/HTML/python.html.sax
+++ b/result/HTML/python.html.sax
@@ -1,7 +1,7 @@
SAX.setDocumentLocator()
SAX.startDocument()
SAX.internalSubset(html, -//W3C//DTD HTML 4.01 Transitional//EN, http://www.w3.org/TR/html4/loose.dtd)
-SAX.processingInstruction(xml-stylesheet, href="./css/ht2html.css" type="text/css"?)
+SAX.comment(?xml-stylesheet href="./css/ht2html.css" type="text/css"?)
SAX.startElement(html)
SAX.characters(
, 1)
diff --git a/result/HTML/xml-declaration-1.html b/result/HTML/xml-declaration-1.html
index 8c9ebe39..1d0ca6c0 100644
--- a/result/HTML/xml-declaration-1.html
+++ b/result/HTML/xml-declaration-1.html
@@ -1,4 +1,4 @@
-
+
öäüß
diff --git a/result/HTML/xml-declaration-1.html.sax b/result/HTML/xml-declaration-1.html.sax
index 83fe8eb6..c1ce23ee 100644
--- a/result/HTML/xml-declaration-1.html.sax
+++ b/result/HTML/xml-declaration-1.html.sax
@@ -1,6 +1,6 @@
SAX.setDocumentLocator()
SAX.startDocument()
-SAX.processingInstruction(xml, encoding="UTF-8")
+SAX.comment(?xml encoding="UTF-8")
SAX.startElement(html)
SAX.startElement(body)
SAX.startElement(p)