From 4f8606c13cb7f2684839f850b83de5ce647d3ca7 Mon Sep 17 00:00:00 2001 From: David Kilzer Date: Tue, 5 Jan 2016 13:38:09 -0800 Subject: [PATCH] Bug 760183: REGRESSION (v2.9.3): XML push parser fails with bogus UTF-8 encoding error when multi-byte character in large CDATA section is split across buffer * parser.c: (xmlCheckCdataPush): Add 'complete' argument to describe whether the buffer passed in is the whole CDATA buffer, or if there is more data to parse. If there is more data to parse, don't return a negative value for an invalid multi-byte UTF-8 character that is split between buffers. (xmlParseTryOrFinish): Pass 'complete' argument to xmlCheckCdataPush() as appropriate. * result/cdata-2-byte-UTF-8.xml: Added. * result/cdata-2-byte-UTF-8.xml.rde: Added. * result/cdata-2-byte-UTF-8.xml.rdr: Added. * result/cdata-2-byte-UTF-8.xml.sax: Added. * result/cdata-2-byte-UTF-8.xml.sax2: Added. * result/cdata-3-byte-UTF-8.xml: Added. * result/cdata-3-byte-UTF-8.xml.rde: Added. * result/cdata-3-byte-UTF-8.xml.rdr: Added. * result/cdata-3-byte-UTF-8.xml.sax: Added. * result/cdata-3-byte-UTF-8.xml.sax2: Added. * result/cdata-4-byte-UTF-8.xml: Added. * result/cdata-4-byte-UTF-8.xml.rde: Added. * result/cdata-4-byte-UTF-8.xml.rdr: Added. * result/cdata-4-byte-UTF-8.xml.sax: Added. * result/cdata-4-byte-UTF-8.xml.sax2: Added. * result/noent/cdata-2-byte-UTF-8.xml: Added. * result/noent/cdata-3-byte-UTF-8.xml: Added. * result/noent/cdata-4-byte-UTF-8.xml: Added. * test/cdata-2-byte-UTF-8.xml: Added. * test/cdata-3-byte-UTF-8.xml: Added. * test/cdata-4-byte-UTF-8.xml: Added. - Add tests and results. Only 'make Readertests XMLPushtests' fails prior to the fix. --- parser.c | 15 ++++++++------- result/cdata-2-byte-UTF-8.xml | 6 ++++++ result/cdata-2-byte-UTF-8.xml.rde | 15 +++++++++++++++ result/cdata-2-byte-UTF-8.xml.rdr | 15 +++++++++++++++ result/cdata-2-byte-UTF-8.xml.sax | 18 ++++++++++++++++++ result/cdata-2-byte-UTF-8.xml.sax2 | 18 ++++++++++++++++++ result/cdata-3-byte-UTF-8.xml | 7 +++++++ result/cdata-3-byte-UTF-8.xml.rde | 20 ++++++++++++++++++++ result/cdata-3-byte-UTF-8.xml.rdr | 20 ++++++++++++++++++++ result/cdata-3-byte-UTF-8.xml.sax | 23 +++++++++++++++++++++++ result/cdata-3-byte-UTF-8.xml.sax2 | 23 +++++++++++++++++++++++ result/cdata-4-byte-UTF-8.xml | 8 ++++++++ result/cdata-4-byte-UTF-8.xml.rde | 25 +++++++++++++++++++++++++ result/cdata-4-byte-UTF-8.xml.rdr | 25 +++++++++++++++++++++++++ result/cdata-4-byte-UTF-8.xml.sax | 28 ++++++++++++++++++++++++++++ result/cdata-4-byte-UTF-8.xml.sax2 | 28 ++++++++++++++++++++++++++++ result/noent/cdata-2-byte-UTF-8.xml | 6 ++++++ result/noent/cdata-3-byte-UTF-8.xml | 7 +++++++ result/noent/cdata-4-byte-UTF-8.xml | 8 ++++++++ test/cdata-2-byte-UTF-8.xml | 6 ++++++ test/cdata-3-byte-UTF-8.xml | 7 +++++++ test/cdata-4-byte-UTF-8.xml | 8 ++++++++ 22 files changed, 329 insertions(+), 7 deletions(-) create mode 100644 result/cdata-2-byte-UTF-8.xml create mode 100644 result/cdata-2-byte-UTF-8.xml.rde create mode 100644 result/cdata-2-byte-UTF-8.xml.rdr create mode 100644 result/cdata-2-byte-UTF-8.xml.sax create mode 100644 result/cdata-2-byte-UTF-8.xml.sax2 create mode 100644 result/cdata-3-byte-UTF-8.xml create mode 100644 result/cdata-3-byte-UTF-8.xml.rde create mode 100644 result/cdata-3-byte-UTF-8.xml.rdr create mode 100644 result/cdata-3-byte-UTF-8.xml.sax create mode 100644 result/cdata-3-byte-UTF-8.xml.sax2 create mode 100644 result/cdata-4-byte-UTF-8.xml create mode 100644 result/cdata-4-byte-UTF-8.xml.rde create mode 100644 result/cdata-4-byte-UTF-8.xml.rdr create mode 100644 result/cdata-4-byte-UTF-8.xml.sax create mode 100644 result/cdata-4-byte-UTF-8.xml.sax2 create mode 100644 result/noent/cdata-2-byte-UTF-8.xml create mode 100644 result/noent/cdata-3-byte-UTF-8.xml create mode 100644 result/noent/cdata-4-byte-UTF-8.xml create mode 100644 test/cdata-2-byte-UTF-8.xml create mode 100644 test/cdata-3-byte-UTF-8.xml create mode 100644 test/cdata-4-byte-UTF-8.xml diff --git a/parser.c b/parser.c index 0677030c..9604a721 100644 --- a/parser.c +++ b/parser.c @@ -11210,8 +11210,9 @@ xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt, } /** * xmlCheckCdataPush: - * @cur: pointer to the bock of characters + * @cur: pointer to the block of characters * @len: length of the block in bytes + * @complete: 1 if complete CDATA block is passed in, 0 if partial block * * Check that the block of characters is okay as SCdata content [20] * @@ -11219,7 +11220,7 @@ xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt, * UTF-8 error occured otherwise */ static int -xmlCheckCdataPush(const xmlChar *utf, int len) { +xmlCheckCdataPush(const xmlChar *utf, int len, int complete) { int ix; unsigned char c; int codepoint; @@ -11237,7 +11238,7 @@ xmlCheckCdataPush(const xmlChar *utf, int len) { else return(-ix); } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */ - if (ix + 2 > len) return(-ix); + if (ix + 2 > len) return(complete ? -ix : ix); if ((utf[ix+1] & 0xc0 ) != 0x80) return(-ix); codepoint = (utf[ix] & 0x1f) << 6; @@ -11246,7 +11247,7 @@ xmlCheckCdataPush(const xmlChar *utf, int len) { return(-ix); ix += 2; } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */ - if (ix + 3 > len) return(-ix); + if (ix + 3 > len) return(complete ? -ix : ix); if (((utf[ix+1] & 0xc0) != 0x80) || ((utf[ix+2] & 0xc0) != 0x80)) return(-ix); @@ -11257,7 +11258,7 @@ xmlCheckCdataPush(const xmlChar *utf, int len) { return(-ix); ix += 3; } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */ - if (ix + 4 > len) return(-ix); + if (ix + 4 > len) return(complete ? -ix : ix); if (((utf[ix+1] & 0xc0) != 0x80) || ((utf[ix+2] & 0xc0) != 0x80) || ((utf[ix+3] & 0xc0) != 0x80)) @@ -11772,7 +11773,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { int tmp; tmp = xmlCheckCdataPush(ctxt->input->cur, - XML_PARSER_BIG_BUFFER_SIZE); + XML_PARSER_BIG_BUFFER_SIZE, 0); if (tmp < 0) { tmp = -tmp; ctxt->input->cur += tmp; @@ -11795,7 +11796,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { } else { int tmp; - tmp = xmlCheckCdataPush(ctxt->input->cur, base); + tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1); if ((tmp < 0) || (tmp != base)) { tmp = -tmp; ctxt->input->cur += tmp; diff --git a/result/cdata-2-byte-UTF-8.xml b/result/cdata-2-byte-UTF-8.xml new file mode 100644 index 00000000..8552efc2 --- /dev/null +++ b/result/cdata-2-byte-UTF-8.xml @@ -0,0 +1,6 @@ + + + +

+

+
diff --git a/result/cdata-2-byte-UTF-8.xml.rde b/result/cdata-2-byte-UTF-8.xml.rde new file mode 100644 index 00000000..2eb29403 --- /dev/null +++ b/result/cdata-2-byte-UTF-8.xml.rde @@ -0,0 +1,15 @@ +0 8 #comment 0 1 This tests that two-byte UTF-8 characters are parsed properly when split across a buffer boundary of length XML_PARSER_BIG_BUFFER_SIZE (300 bytes). +0 1 doc 0 0 +1 14 #text 0 1 + +1 1 p 0 0 +2 4 #cdata-section 0 1 ČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČ +1 15 p 0 0 +1 14 #text 0 1 + +1 1 p 0 0 +2 4 #cdata-section 0 1 ČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČ +1 15 p 0 0 +1 14 #text 0 1 + +0 15 doc 0 0 diff --git a/result/cdata-2-byte-UTF-8.xml.rdr b/result/cdata-2-byte-UTF-8.xml.rdr new file mode 100644 index 00000000..2eb29403 --- /dev/null +++ b/result/cdata-2-byte-UTF-8.xml.rdr @@ -0,0 +1,15 @@ +0 8 #comment 0 1 This tests that two-byte UTF-8 characters are parsed properly when split across a buffer boundary of length XML_PARSER_BIG_BUFFER_SIZE (300 bytes). +0 1 doc 0 0 +1 14 #text 0 1 + +1 1 p 0 0 +2 4 #cdata-section 0 1 ČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČ +1 15 p 0 0 +1 14 #text 0 1 + +1 1 p 0 0 +2 4 #cdata-section 0 1 ČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČ +1 15 p 0 0 +1 14 #text 0 1 + +0 15 doc 0 0 diff --git a/result/cdata-2-byte-UTF-8.xml.sax b/result/cdata-2-byte-UTF-8.xml.sax new file mode 100644 index 00000000..f397f6a7 --- /dev/null +++ b/result/cdata-2-byte-UTF-8.xml.sax @@ -0,0 +1,18 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.comment( This tests that two-byte UTF-8 characters are parsed properly when split across a buffer boundary of length XML_PARSER_BIG_BUFFER_SIZE (300 bytes). ) +SAX.startElement(doc) +SAX.characters( +, 1) +SAX.startElement(p) +SAX.pcdata(ČČČČČČČČČČ, 1200) +SAX.endElement(p) +SAX.characters( +, 1) +SAX.startElement(p) +SAX.pcdata( ČČČČČČČČČ, 1201) +SAX.endElement(p) +SAX.characters( +, 1) +SAX.endElement(doc) +SAX.endDocument() diff --git a/result/cdata-2-byte-UTF-8.xml.sax2 b/result/cdata-2-byte-UTF-8.xml.sax2 new file mode 100644 index 00000000..2da2d500 --- /dev/null +++ b/result/cdata-2-byte-UTF-8.xml.sax2 @@ -0,0 +1,18 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.comment( This tests that two-byte UTF-8 characters are parsed properly when split across a buffer boundary of length XML_PARSER_BIG_BUFFER_SIZE (300 bytes). ) +SAX.startElementNs(doc, NULL, NULL, 0, 0, 0) +SAX.characters( +, 1) +SAX.startElementNs(p, NULL, NULL, 0, 0, 0) +SAX.pcdata(ČČČČČČČČČČ, 1200) +SAX.endElementNs(p, NULL, NULL) +SAX.characters( +, 1) +SAX.startElementNs(p, NULL, NULL, 0, 0, 0) +SAX.pcdata( ČČČČČČČČČ, 1201) +SAX.endElementNs(p, NULL, NULL) +SAX.characters( +, 1) +SAX.endElementNs(doc, NULL, NULL) +SAX.endDocument() diff --git a/result/cdata-3-byte-UTF-8.xml b/result/cdata-3-byte-UTF-8.xml new file mode 100644 index 00000000..b959a127 --- /dev/null +++ b/result/cdata-3-byte-UTF-8.xml @@ -0,0 +1,7 @@ + + + +

+

+

+
diff --git a/result/cdata-3-byte-UTF-8.xml.rde b/result/cdata-3-byte-UTF-8.xml.rde new file mode 100644 index 00000000..3f4d1c59 --- /dev/null +++ b/result/cdata-3-byte-UTF-8.xml.rde @@ -0,0 +1,20 @@ +0 8 #comment 0 1 This tests that three-byte UTF-8 characters are parsed properly when split across a buffer boundary of length XML_PARSER_BIG_BUFFER_SIZE (300 bytes). +0 1 doc 0 0 +1 14 #text 0 1 + +1 1 p 0 0 +2 4 #cdata-section 0 1 牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛 +1 15 p 0 0 +1 14 #text 0 1 + +1 1 p 0 0 +2 4 #cdata-section 0 1 牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛 +1 15 p 0 0 +1 14 #text 0 1 + +1 1 p 0 0 +2 4 #cdata-section 0 1 牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛 +1 15 p 0 0 +1 14 #text 0 1 + +0 15 doc 0 0 diff --git a/result/cdata-3-byte-UTF-8.xml.rdr b/result/cdata-3-byte-UTF-8.xml.rdr new file mode 100644 index 00000000..3f4d1c59 --- /dev/null +++ b/result/cdata-3-byte-UTF-8.xml.rdr @@ -0,0 +1,20 @@ +0 8 #comment 0 1 This tests that three-byte UTF-8 characters are parsed properly when split across a buffer boundary of length XML_PARSER_BIG_BUFFER_SIZE (300 bytes). +0 1 doc 0 0 +1 14 #text 0 1 + +1 1 p 0 0 +2 4 #cdata-section 0 1 牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛 +1 15 p 0 0 +1 14 #text 0 1 + +1 1 p 0 0 +2 4 #cdata-section 0 1 牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛 +1 15 p 0 0 +1 14 #text 0 1 + +1 1 p 0 0 +2 4 #cdata-section 0 1 牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛 +1 15 p 0 0 +1 14 #text 0 1 + +0 15 doc 0 0 diff --git a/result/cdata-3-byte-UTF-8.xml.sax b/result/cdata-3-byte-UTF-8.xml.sax new file mode 100644 index 00000000..2f73e7ca --- /dev/null +++ b/result/cdata-3-byte-UTF-8.xml.sax @@ -0,0 +1,23 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.comment( This tests that three-byte UTF-8 characters are parsed properly when split across a buffer boundary of length XML_PARSER_BIG_BUFFER_SIZE (300 bytes). ) +SAX.startElement(doc) +SAX.characters( +, 1) +SAX.startElement(p) +SAX.pcdata(牛牛牛牛牛牛, 1200) +SAX.endElement(p) +SAX.characters( +, 1) +SAX.startElement(p) +SAX.pcdata( 牛牛牛牛牛牛, 1201) +SAX.endElement(p) +SAX.characters( +, 1) +SAX.startElement(p) +SAX.pcdata( 牛牛牛牛牛牛, 1202) +SAX.endElement(p) +SAX.characters( +, 1) +SAX.endElement(doc) +SAX.endDocument() diff --git a/result/cdata-3-byte-UTF-8.xml.sax2 b/result/cdata-3-byte-UTF-8.xml.sax2 new file mode 100644 index 00000000..3969579c --- /dev/null +++ b/result/cdata-3-byte-UTF-8.xml.sax2 @@ -0,0 +1,23 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.comment( This tests that three-byte UTF-8 characters are parsed properly when split across a buffer boundary of length XML_PARSER_BIG_BUFFER_SIZE (300 bytes). ) +SAX.startElementNs(doc, NULL, NULL, 0, 0, 0) +SAX.characters( +, 1) +SAX.startElementNs(p, NULL, NULL, 0, 0, 0) +SAX.pcdata(牛牛牛牛牛牛, 1200) +SAX.endElementNs(p, NULL, NULL) +SAX.characters( +, 1) +SAX.startElementNs(p, NULL, NULL, 0, 0, 0) +SAX.pcdata( 牛牛牛牛牛牛, 1201) +SAX.endElementNs(p, NULL, NULL) +SAX.characters( +, 1) +SAX.startElementNs(p, NULL, NULL, 0, 0, 0) +SAX.pcdata( 牛牛牛牛牛牛, 1202) +SAX.endElementNs(p, NULL, NULL) +SAX.characters( +, 1) +SAX.endElementNs(doc, NULL, NULL) +SAX.endDocument() diff --git a/result/cdata-4-byte-UTF-8.xml b/result/cdata-4-byte-UTF-8.xml new file mode 100644 index 00000000..4d1d9a83 --- /dev/null +++ b/result/cdata-4-byte-UTF-8.xml @@ -0,0 +1,8 @@ + + + +

+

+

+

+
diff --git a/result/cdata-4-byte-UTF-8.xml.rde b/result/cdata-4-byte-UTF-8.xml.rde new file mode 100644 index 00000000..437b79e6 --- /dev/null +++ b/result/cdata-4-byte-UTF-8.xml.rde @@ -0,0 +1,25 @@ +0 8 #comment 0 1 This tests that four-byte UTF-8 characters are parsed properly when split across a buffer boundary of length XML_PARSER_BIG_BUFFER_SIZE (300 bytes). +0 1 doc 0 0 +1 14 #text 0 1 + +1 1 p 0 0 +2 4 #cdata-section 0 1 🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦 +1 15 p 0 0 +1 14 #text 0 1 + +1 1 p 0 0 +2 4 #cdata-section 0 1 🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦 +1 15 p 0 0 +1 14 #text 0 1 + +1 1 p 0 0 +2 4 #cdata-section 0 1 🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦 +1 15 p 0 0 +1 14 #text 0 1 + +1 1 p 0 0 +2 4 #cdata-section 0 1 🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦 +1 15 p 0 0 +1 14 #text 0 1 + +0 15 doc 0 0 diff --git a/result/cdata-4-byte-UTF-8.xml.rdr b/result/cdata-4-byte-UTF-8.xml.rdr new file mode 100644 index 00000000..437b79e6 --- /dev/null +++ b/result/cdata-4-byte-UTF-8.xml.rdr @@ -0,0 +1,25 @@ +0 8 #comment 0 1 This tests that four-byte UTF-8 characters are parsed properly when split across a buffer boundary of length XML_PARSER_BIG_BUFFER_SIZE (300 bytes). +0 1 doc 0 0 +1 14 #text 0 1 + +1 1 p 0 0 +2 4 #cdata-section 0 1 🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦 +1 15 p 0 0 +1 14 #text 0 1 + +1 1 p 0 0 +2 4 #cdata-section 0 1 🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦 +1 15 p 0 0 +1 14 #text 0 1 + +1 1 p 0 0 +2 4 #cdata-section 0 1 🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦 +1 15 p 0 0 +1 14 #text 0 1 + +1 1 p 0 0 +2 4 #cdata-section 0 1 🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦 +1 15 p 0 0 +1 14 #text 0 1 + +0 15 doc 0 0 diff --git a/result/cdata-4-byte-UTF-8.xml.sax b/result/cdata-4-byte-UTF-8.xml.sax new file mode 100644 index 00000000..d8abcfb5 --- /dev/null +++ b/result/cdata-4-byte-UTF-8.xml.sax @@ -0,0 +1,28 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.comment( This tests that four-byte UTF-8 characters are parsed properly when split across a buffer boundary of length XML_PARSER_BIG_BUFFER_SIZE (300 bytes). ) +SAX.startElement(doc) +SAX.characters( +, 1) +SAX.startElement(p) +SAX.pcdata(🍦🍦🍦🍦🍦, 1200) +SAX.endElement(p) +SAX.characters( +, 1) +SAX.startElement(p) +SAX.pcdata( 🍦🍦🍦🍦, 1201) +SAX.endElement(p) +SAX.characters( +, 1) +SAX.startElement(p) +SAX.pcdata( 🍦🍦🍦🍦, 1202) +SAX.endElement(p) +SAX.characters( +, 1) +SAX.startElement(p) +SAX.pcdata( 🍦🍦🍦🍦, 1203) +SAX.endElement(p) +SAX.characters( +, 1) +SAX.endElement(doc) +SAX.endDocument() diff --git a/result/cdata-4-byte-UTF-8.xml.sax2 b/result/cdata-4-byte-UTF-8.xml.sax2 new file mode 100644 index 00000000..5e07d83e --- /dev/null +++ b/result/cdata-4-byte-UTF-8.xml.sax2 @@ -0,0 +1,28 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.comment( This tests that four-byte UTF-8 characters are parsed properly when split across a buffer boundary of length XML_PARSER_BIG_BUFFER_SIZE (300 bytes). ) +SAX.startElementNs(doc, NULL, NULL, 0, 0, 0) +SAX.characters( +, 1) +SAX.startElementNs(p, NULL, NULL, 0, 0, 0) +SAX.pcdata(🍦🍦🍦🍦🍦, 1200) +SAX.endElementNs(p, NULL, NULL) +SAX.characters( +, 1) +SAX.startElementNs(p, NULL, NULL, 0, 0, 0) +SAX.pcdata( 🍦🍦🍦🍦, 1201) +SAX.endElementNs(p, NULL, NULL) +SAX.characters( +, 1) +SAX.startElementNs(p, NULL, NULL, 0, 0, 0) +SAX.pcdata( 🍦🍦🍦🍦, 1202) +SAX.endElementNs(p, NULL, NULL) +SAX.characters( +, 1) +SAX.startElementNs(p, NULL, NULL, 0, 0, 0) +SAX.pcdata( 🍦🍦🍦🍦, 1203) +SAX.endElementNs(p, NULL, NULL) +SAX.characters( +, 1) +SAX.endElementNs(doc, NULL, NULL) +SAX.endDocument() diff --git a/result/noent/cdata-2-byte-UTF-8.xml b/result/noent/cdata-2-byte-UTF-8.xml new file mode 100644 index 00000000..8552efc2 --- /dev/null +++ b/result/noent/cdata-2-byte-UTF-8.xml @@ -0,0 +1,6 @@ + + + +

+

+
diff --git a/result/noent/cdata-3-byte-UTF-8.xml b/result/noent/cdata-3-byte-UTF-8.xml new file mode 100644 index 00000000..b959a127 --- /dev/null +++ b/result/noent/cdata-3-byte-UTF-8.xml @@ -0,0 +1,7 @@ + + + +

+

+

+
diff --git a/result/noent/cdata-4-byte-UTF-8.xml b/result/noent/cdata-4-byte-UTF-8.xml new file mode 100644 index 00000000..4d1d9a83 --- /dev/null +++ b/result/noent/cdata-4-byte-UTF-8.xml @@ -0,0 +1,8 @@ + + + +

+

+

+

+
diff --git a/test/cdata-2-byte-UTF-8.xml b/test/cdata-2-byte-UTF-8.xml new file mode 100644 index 00000000..8552efc2 --- /dev/null +++ b/test/cdata-2-byte-UTF-8.xml @@ -0,0 +1,6 @@ + + + +

+

+
diff --git a/test/cdata-3-byte-UTF-8.xml b/test/cdata-3-byte-UTF-8.xml new file mode 100644 index 00000000..b959a127 --- /dev/null +++ b/test/cdata-3-byte-UTF-8.xml @@ -0,0 +1,7 @@ + + + +

+

+

+
diff --git a/test/cdata-4-byte-UTF-8.xml b/test/cdata-4-byte-UTF-8.xml new file mode 100644 index 00000000..4d1d9a83 --- /dev/null +++ b/test/cdata-4-byte-UTF-8.xml @@ -0,0 +1,8 @@ + + + +

+

+

+

+