diff --git a/parserInternals.c b/parserInternals.c index f09142b8..0270f06e 100644 --- a/parserInternals.c +++ b/parserInternals.c @@ -709,16 +709,6 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { } return((int) *ctxt->input->cur); encoding_error: - /* - * An encoding problem may arise from a truncated input buffer - * splitting a character in the middle. In that case do not raise - * an error but return 0 to endicate an end of stream problem - */ - if (ctxt->input->end - ctxt->input->cur < 4) { - *len = 0; - return(0); - } - /* * If we detect an UTF8 error that probably mean that the * input encoding didn't get properly advertised in the @@ -729,9 +719,21 @@ encoding_error: { char buffer[150]; - snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", - ctxt->input->cur[0], ctxt->input->cur[1], - ctxt->input->cur[2], ctxt->input->cur[3]); + if (ctxt->input->cur[1] == 0) { + snprintf(&buffer[0], 149, "Bytes: 0x%02X EOF\n", + ctxt->input->cur[0]); + } else if (ctxt->input->cur[2] == 0) { + snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X EOF\n", + ctxt->input->cur[0], ctxt->input->cur[1]); + } else if (ctxt->input->cur[3] == 0) { + snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X EOF\n", + ctxt->input->cur[0], ctxt->input->cur[1], + ctxt->input->cur[2]); + } else { + snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", + ctxt->input->cur[0], ctxt->input->cur[1], + ctxt->input->cur[2], ctxt->input->cur[3]); + } __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, "Input is not proper UTF-8, indicate encoding !\n%s", BAD_CAST buffer, NULL); @@ -821,17 +823,6 @@ xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len) *len = 1; return ((int) *cur); encoding_error: - - /* - * An encoding problem may arise from a truncated input buffer - * splitting a character in the middle. In that case do not raise - * an error but return 0 to endicate an end of stream problem - */ - if ((ctxt == NULL) || (ctxt->input == NULL) || - (ctxt->input->end - ctxt->input->cur < 4)) { - *len = 0; - return(0); - } /* * If we detect an UTF8 error that probably mean that the * input encoding didn't get properly advertised in the @@ -842,9 +833,19 @@ encoding_error: { char buffer[150]; - snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", - ctxt->input->cur[0], ctxt->input->cur[1], - ctxt->input->cur[2], ctxt->input->cur[3]); + if (cur[1] == 0) { + snprintf(&buffer[0], 149, "Bytes: 0x%02X EOF\n", + cur[0]); + } else if (cur[2] == 0) { + snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X EOF\n", + cur[0], cur[1]); + } else if (cur[3] == 0) { + snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X EOF\n", + cur[0], cur[1], cur[2]); + } else { + snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", + cur[0], cur[1], cur[2], cur[3]); + } __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, "Input is not proper UTF-8, indicate encoding !\n%s", BAD_CAST buffer, NULL); diff --git a/result/errors/partial_utf8_1.xml b/result/errors/partial_utf8_1.xml new file mode 100644 index 00000000..e69de29b diff --git a/result/errors/partial_utf8_1.xml.err b/result/errors/partial_utf8_1.xml.err new file mode 100644 index 00000000..544594f2 --- /dev/null +++ b/result/errors/partial_utf8_1.xml.err @@ -0,0 +1,7 @@ +./test/errors/partial_utf8_1.xml:1: parser error : Input is not proper UTF-8, indicate encoding ! +Bytes: 0xC2 EOF +Â + ^ +./test/errors/partial_utf8_1.xml:1: parser error : Premature end of data in tag a line 1 +Â + ^ diff --git a/result/errors/partial_utf8_1.xml.str b/result/errors/partial_utf8_1.xml.str new file mode 100644 index 00000000..8b0cb579 --- /dev/null +++ b/result/errors/partial_utf8_1.xml.str @@ -0,0 +1,4 @@ +./test/errors/partial_utf8_1.xml:1: parser error : Extra content at the end of the document +Â + ^ +./test/errors/partial_utf8_1.xml : failed to parse diff --git a/result/errors/partial_utf8_2.xml b/result/errors/partial_utf8_2.xml new file mode 100644 index 00000000..e69de29b diff --git a/result/errors/partial_utf8_2.xml.err b/result/errors/partial_utf8_2.xml.err new file mode 100644 index 00000000..b205821b --- /dev/null +++ b/result/errors/partial_utf8_2.xml.err @@ -0,0 +1,7 @@ +./test/errors/partial_utf8_2.xml:1: parser error : Input is not proper UTF-8, indicate encoding ! +Bytes: 0xE3 0xA0 EOF +ã  + ^ +./test/errors/partial_utf8_2.xml:1: parser error : Premature end of data in tag a line 1 +ã  + ^ diff --git a/result/errors/partial_utf8_2.xml.str b/result/errors/partial_utf8_2.xml.str new file mode 100644 index 00000000..a91e904d --- /dev/null +++ b/result/errors/partial_utf8_2.xml.str @@ -0,0 +1,5 @@ +./test/errors/partial_utf8_2.xml:1: parser error : Input is not proper UTF-8, indicate encoding ! +Bytes: 0xE3 0xA0 EOF +ã  + ^ +./test/errors/partial_utf8_2.xml : failed to parse diff --git a/result/errors/partial_utf8_3.xml b/result/errors/partial_utf8_3.xml new file mode 100644 index 00000000..e69de29b diff --git a/result/errors/partial_utf8_3.xml.err b/result/errors/partial_utf8_3.xml.err new file mode 100644 index 00000000..111ac111 --- /dev/null +++ b/result/errors/partial_utf8_3.xml.err @@ -0,0 +1,7 @@ +./test/errors/partial_utf8_3.xml:1: parser error : Input is not proper UTF-8, indicate encoding ! +Bytes: 0xF2 0xA0 0xA0 EOF +ò   + ^ +./test/errors/partial_utf8_3.xml:1: parser error : Premature end of data in tag a line 1 +ò   + ^ diff --git a/result/errors/partial_utf8_3.xml.str b/result/errors/partial_utf8_3.xml.str new file mode 100644 index 00000000..2060852e --- /dev/null +++ b/result/errors/partial_utf8_3.xml.str @@ -0,0 +1,5 @@ +./test/errors/partial_utf8_3.xml:1: parser error : Input is not proper UTF-8, indicate encoding ! +Bytes: 0xF2 0xA0 0xA0 EOF +ò   + ^ +./test/errors/partial_utf8_3.xml : failed to parse diff --git a/test/errors/partial_utf8_1.xml b/test/errors/partial_utf8_1.xml new file mode 100644 index 00000000..f8596206 --- /dev/null +++ b/test/errors/partial_utf8_1.xml @@ -0,0 +1 @@ +Â \ No newline at end of file diff --git a/test/errors/partial_utf8_2.xml b/test/errors/partial_utf8_2.xml new file mode 100644 index 00000000..dbc2d817 --- /dev/null +++ b/test/errors/partial_utf8_2.xml @@ -0,0 +1 @@ +ã  \ No newline at end of file diff --git a/test/errors/partial_utf8_3.xml b/test/errors/partial_utf8_3.xml new file mode 100644 index 00000000..0abd9de8 --- /dev/null +++ b/test/errors/partial_utf8_3.xml @@ -0,0 +1 @@ +ò   \ No newline at end of file