diff --git a/parser.c b/parser.c index 9a7135fd..10053951 100644 --- a/parser.c +++ b/parser.c @@ -4586,7 +4586,7 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { } } } - if ((ctxt->input->cur < ctxt->input->end) && (!IS_CHAR(cur))) { + if ((cur != 0) && (!IS_CHAR(cur))) { /* Generate the error and skip the offending character */ xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, "PCDATA invalid Char value %d\n", diff --git a/parserInternals.c b/parserInternals.c index 81b0e0f2..43a0f5ac 100644 --- a/parserInternals.c +++ b/parserInternals.c @@ -700,6 +700,16 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { } return((int) *ctxt->input->cur); encoding_error: + /* + * An encoding problem may arise from a truncated input buffer + * splitting a character in the middle. In that case do not raise + * an error but return 0 to endicate an end of stream problem + */ + if (ctxt->input->end - ctxt->input->cur < 4) { + *len = 0; + return(0); + } + /* * If we detect an UTF8 error that probably mean that the * input encoding didn't get properly advertised in the @@ -710,21 +720,9 @@ encoding_error: { char buffer[150]; - if (ctxt->input->cur[1] == 0) { - snprintf(&buffer[0], 149, "Bytes: 0x%02X EOF\n", - ctxt->input->cur[0]); - } else if (ctxt->input->cur[2] == 0) { - snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X EOF\n", - ctxt->input->cur[0], ctxt->input->cur[1]); - } else if (ctxt->input->cur[3] == 0) { - snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X EOF\n", - ctxt->input->cur[0], ctxt->input->cur[1], - ctxt->input->cur[2]); - } else { - snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", - ctxt->input->cur[0], ctxt->input->cur[1], - ctxt->input->cur[2], ctxt->input->cur[3]); - } + snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", + ctxt->input->cur[0], ctxt->input->cur[1], + ctxt->input->cur[2], ctxt->input->cur[3]); __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, "Input is not proper UTF-8, indicate encoding !\n%s", BAD_CAST buffer, NULL); @@ -814,6 +812,17 @@ xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len) *len = 1; return ((int) *cur); encoding_error: + + /* + * An encoding problem may arise from a truncated input buffer + * splitting a character in the middle. In that case do not raise + * an error but return 0 to endicate an end of stream problem + */ + if ((ctxt == NULL) || (ctxt->input == NULL) || + (ctxt->input->end - ctxt->input->cur < 4)) { + *len = 0; + return(0); + } /* * If we detect an UTF8 error that probably mean that the * input encoding didn't get properly advertised in the @@ -824,19 +833,9 @@ encoding_error: { char buffer[150]; - if (cur[1] == 0) { - snprintf(&buffer[0], 149, "Bytes: 0x%02X EOF\n", - cur[0]); - } else if (cur[2] == 0) { - snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X EOF\n", - cur[0], cur[1]); - } else if (cur[3] == 0) { - snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X EOF\n", - cur[0], cur[1], cur[2]); - } else { - snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", - cur[0], cur[1], cur[2], cur[3]); - } + snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", + ctxt->input->cur[0], ctxt->input->cur[1], + ctxt->input->cur[2], ctxt->input->cur[3]); __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, "Input is not proper UTF-8, indicate encoding !\n%s", BAD_CAST buffer, NULL); diff --git a/result/errors/partial_utf8_1.xml b/result/errors/partial_utf8_1.xml deleted file mode 100644 index e69de29b..00000000 diff --git a/result/errors/partial_utf8_1.xml.err b/result/errors/partial_utf8_1.xml.err deleted file mode 100644 index 544594f2..00000000 --- a/result/errors/partial_utf8_1.xml.err +++ /dev/null @@ -1,7 +0,0 @@ -./test/errors/partial_utf8_1.xml:1: parser error : Input is not proper UTF-8, indicate encoding ! -Bytes: 0xC2 EOF -Â - ^ -./test/errors/partial_utf8_1.xml:1: parser error : Premature end of data in tag a line 1 -Â - ^ diff --git a/result/errors/partial_utf8_1.xml.str b/result/errors/partial_utf8_1.xml.str deleted file mode 100644 index 8b0cb579..00000000 --- a/result/errors/partial_utf8_1.xml.str +++ /dev/null @@ -1,4 +0,0 @@ -./test/errors/partial_utf8_1.xml:1: parser error : Extra content at the end of the document -Â - ^ -./test/errors/partial_utf8_1.xml : failed to parse diff --git a/result/errors/partial_utf8_2.xml b/result/errors/partial_utf8_2.xml deleted file mode 100644 index e69de29b..00000000 diff --git a/result/errors/partial_utf8_2.xml.err b/result/errors/partial_utf8_2.xml.err deleted file mode 100644 index b205821b..00000000 --- a/result/errors/partial_utf8_2.xml.err +++ /dev/null @@ -1,7 +0,0 @@ -./test/errors/partial_utf8_2.xml:1: parser error : Input is not proper UTF-8, indicate encoding ! -Bytes: 0xE3 0xA0 EOF -ã  - ^ -./test/errors/partial_utf8_2.xml:1: parser error : Premature end of data in tag a line 1 -ã  - ^ diff --git a/result/errors/partial_utf8_2.xml.str b/result/errors/partial_utf8_2.xml.str deleted file mode 100644 index a91e904d..00000000 --- a/result/errors/partial_utf8_2.xml.str +++ /dev/null @@ -1,5 +0,0 @@ -./test/errors/partial_utf8_2.xml:1: parser error : Input is not proper UTF-8, indicate encoding ! -Bytes: 0xE3 0xA0 EOF -ã  - ^ -./test/errors/partial_utf8_2.xml : failed to parse diff --git a/result/errors/partial_utf8_3.xml b/result/errors/partial_utf8_3.xml deleted file mode 100644 index e69de29b..00000000 diff --git a/result/errors/partial_utf8_3.xml.err b/result/errors/partial_utf8_3.xml.err deleted file mode 100644 index 111ac111..00000000 --- a/result/errors/partial_utf8_3.xml.err +++ /dev/null @@ -1,7 +0,0 @@ -./test/errors/partial_utf8_3.xml:1: parser error : Input is not proper UTF-8, indicate encoding ! -Bytes: 0xF2 0xA0 0xA0 EOF -ò   - ^ -./test/errors/partial_utf8_3.xml:1: parser error : Premature end of data in tag a line 1 -ò   - ^ diff --git a/result/errors/partial_utf8_3.xml.str b/result/errors/partial_utf8_3.xml.str deleted file mode 100644 index 2060852e..00000000 --- a/result/errors/partial_utf8_3.xml.str +++ /dev/null @@ -1,5 +0,0 @@ -./test/errors/partial_utf8_3.xml:1: parser error : Input is not proper UTF-8, indicate encoding ! -Bytes: 0xF2 0xA0 0xA0 EOF -ò   - ^ -./test/errors/partial_utf8_3.xml : failed to parse diff --git a/test/errors/partial_utf8_1.xml b/test/errors/partial_utf8_1.xml deleted file mode 100644 index f8596206..00000000 --- a/test/errors/partial_utf8_1.xml +++ /dev/null @@ -1 +0,0 @@ -Â \ No newline at end of file diff --git a/test/errors/partial_utf8_2.xml b/test/errors/partial_utf8_2.xml deleted file mode 100644 index dbc2d817..00000000 --- a/test/errors/partial_utf8_2.xml +++ /dev/null @@ -1 +0,0 @@ -ã  \ No newline at end of file diff --git a/test/errors/partial_utf8_3.xml b/test/errors/partial_utf8_3.xml deleted file mode 100644 index 0abd9de8..00000000 --- a/test/errors/partial_utf8_3.xml +++ /dev/null @@ -1 +0,0 @@ -ò   \ No newline at end of file