1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2024-10-26 12:25:09 +03:00

test: Add test for push parser boundaries

This commit is contained in:
Nick Wellnhofer 2022-11-20 15:36:41 +01:00
parent 691a771956
commit 9c63cea5a6
9 changed files with 489 additions and 0 deletions

15
result/boundaries1.xml Normal file
View File

@ -0,0 +1,15 @@
<?xml version="1.0"?>
<!DOCTYPE d [
<!ENTITY a "]>">
<!ENTITY b "]>">
<!--> ]> -->]>
<?pi p1?>
<!--> c1 -->
<d a="&gt;" b="&gt;">
text&a;text
<![CDATA[cdata]]>
<?pi p2?>
<!--> c2 -->
</d>
<?pi p3?>
<!--> c3 -->

View File

@ -0,0 +1,19 @@
0 10 d 0 0
0 7 pi 0 1 p1
0 8 #comment 0 1 > c1
0 1 d 0 0
1 3 #text 0 1
text]>text
1 4 #cdata-section 0 1 cdata
1 14 #text 0 1
1 7 pi 0 1 p2
1 14 #text 0 1
1 8 #comment 0 1 > c2
1 14 #text 0 1
0 15 d 0 0
0 7 pi 0 1 p3
0 8 #comment 0 1 > c3

View File

@ -0,0 +1,21 @@
0 10 d 0 0
0 7 pi 0 1 p1
0 8 #comment 0 1 > c1
0 1 d 0 0
1 3 #text 0 1
text
1 5 a 0 0
1 3 #text 0 1 text
1 4 #cdata-section 0 1 cdata
1 14 #text 0 1
1 7 pi 0 1 p2
1 14 #text 0 1
1 8 #comment 0 1 > c2
1 14 #text 0 1
0 15 d 0 0
0 7 pi 0 1 p3
0 8 #comment 0 1 > c3

View File

@ -0,0 +1,32 @@
SAX.setDocumentLocator()
SAX.startDocument()
SAX.internalSubset(d, , )
SAX.entityDecl(a, 1, (null), (null), ]>)
SAX.getEntity(a)
SAX.entityDecl(b, 1, (null), (null), ]>)
SAX.getEntity(b)
SAX.comment(> ]> )
SAX.externalSubset(d, , )
SAX.processingInstruction(pi, p1)
SAX.comment(> c1 )
SAX.startElement(d, a='>', b='>')
SAX.characters(
text, 5)
SAX.getEntity(a)
SAX.characters(]>, 2)
SAX.reference(a)
SAX.characters(text
, 5)
SAX.pcdata(cdata, 5)
SAX.characters(
, 1)
SAX.processingInstruction(pi, p2)
SAX.characters(
, 1)
SAX.comment(> c2 )
SAX.characters(
, 1)
SAX.endElement(d)
SAX.processingInstruction(pi, p3)
SAX.comment(> c3 )
SAX.endDocument()

View File

@ -0,0 +1,33 @@
SAX.setDocumentLocator()
SAX.startDocument()
SAX.internalSubset(d, , )
SAX.entityDecl(a, 1, (null), (null), ]>)
SAX.getEntity(a)
SAX.entityDecl(b, 1, (null), (null), ]>)
SAX.getEntity(b)
SAX.comment(> ]> )
SAX.externalSubset(d, , )
SAX.processingInstruction(pi, p1)
SAX.comment(> c1 )
SAX.startElementNs(d, NULL, NULL, 0, 2, 0, a='>" b...', 1, b='>'>
...', 1)
SAX.characters(
text, 5)
SAX.getEntity(a)
SAX.characters(]>, 2)
SAX.reference(a)
SAX.characters(text
, 5)
SAX.pcdata(cdata, 5)
SAX.characters(
, 1)
SAX.processingInstruction(pi, p2)
SAX.characters(
, 1)
SAX.comment(> c2 )
SAX.characters(
, 1)
SAX.endElementNs(d, NULL, NULL)
SAX.processingInstruction(pi, p3)
SAX.comment(> c3 )
SAX.endDocument()

View File

@ -0,0 +1,15 @@
<?xml version="1.0"?>
<!DOCTYPE d [
<!ENTITY a "]>">
<!ENTITY b "]>">
<!--> ]> -->]>
<?pi p1?>
<!--> c1 -->
<d a="&gt;" b="&gt;">
text]&gt;text
<![CDATA[cdata]]>
<?pi p2?>
<!--> c2 -->
</d>
<?pi p3?>
<!--> c3 -->

View File

@ -0,0 +1,32 @@
SAX.setDocumentLocator()
SAX.startDocument()
SAX.internalSubset(d, , )
SAX.entityDecl(a, 1, (null), (null), ]>)
SAX.getEntity(a)
SAX.entityDecl(b, 1, (null), (null), ]>)
SAX.getEntity(b)
SAX.comment(> ]> )
SAX.externalSubset(d, , )
SAX.processingInstruction(pi, p1)
SAX.comment(> c1 )
SAX.startElementNs(d, NULL, NULL, 0, 2, 0, a='>" b...', 1, b='>'>
...', 1)
SAX.characters(
text, 5)
SAX.getEntity(a)
SAX.characters(]>, 2)
SAX.characters(text
, 5)
SAX.pcdata(cdata, 5)
SAX.characters(
, 1)
SAX.processingInstruction(pi, p2)
SAX.characters(
, 1)
SAX.comment(> c2 )
SAX.characters(
, 1)
SAX.endElementNs(d, NULL, NULL)
SAX.processingInstruction(pi, p3)
SAX.comment(> c3 )
SAX.endDocument()

306
runtest.c
View File

@ -1970,6 +1970,306 @@ pushParseTest(const char *filename, const char *result,
}
return(0);
}
static int pushBoundaryCount;
static int pushBoundaryRefCount;
static int pushBoundaryCharsCount;
static int pushBoundaryCDataCount;
static void
internalSubsetBnd(void *ctx, const xmlChar *name, const xmlChar *externalID,
const xmlChar *systemID) {
pushBoundaryCount++;
xmlSAX2InternalSubset(ctx, name, externalID, systemID);
}
static void
referenceBnd(void *ctx, const xmlChar *name) {
pushBoundaryRefCount++;
xmlSAX2Reference(ctx, name);
}
static void
charactersBnd(void *ctx, const xmlChar *ch, int len) {
pushBoundaryCount++;
pushBoundaryCharsCount++;
xmlSAX2Characters(ctx, ch, len);
}
static void
cdataBlockBnd(void *ctx, const xmlChar *ch, int len) {
pushBoundaryCount++;
pushBoundaryCDataCount++;
xmlSAX2CDataBlock(ctx, ch, len);
}
static void
processingInstructionBnd(void *ctx, const xmlChar *target,
const xmlChar *data) {
pushBoundaryCount++;
xmlSAX2ProcessingInstruction(ctx, target, data);
}
static void
commentBnd(void *ctx, const xmlChar *value) {
xmlParserCtxtPtr ctxt = ctx;
if (ctxt->inSubset == 0)
pushBoundaryCount++;
xmlSAX2Comment(ctx, value);
}
static void
startElementBnd(void *ctx, const xmlChar *xname, const xmlChar **atts) {
const char *name = (const char *)xname;
/* Some elements might be created automatically. */
if ((strcmp(name, "html") != 0) &&
(strcmp(name, "body") != 0) &&
(strcmp(name, "head") != 0) &&
(strcmp(name, "p") != 0)) {
pushBoundaryCount++;
}
xmlSAX2StartElement(ctx, xname, atts);
}
static void
endElementBnd(void *ctx, const xmlChar *name) {
/*pushBoundaryCount++;*/
xmlSAX2EndElement(ctx, name);
}
static void
startElementNsBnd(void *ctx, const xmlChar *localname, const xmlChar *prefix,
const xmlChar *URI, int nb_namespaces,
const xmlChar **namespaces, int nb_attributes,
int nb_defaulted, const xmlChar **attributes) {
pushBoundaryCount++;
xmlSAX2StartElementNs(ctx, localname, prefix, URI, nb_namespaces,
namespaces, nb_attributes, nb_defaulted, attributes);
}
static void
endElementNsBnd(void *ctx, const xmlChar *localname, const xmlChar *prefix,
const xmlChar *URI) {
/*pushBoundaryCount++;*/
xmlSAX2EndElementNs(ctx, localname, prefix, URI);
}
/**
* pushBoundaryTest:
* @filename: the file to parse
* @result: the file with expected result
* @err: the file with error messages: unused
*
* Test whether the push parser detects boundaries between syntactical
* elements correctly.
*
* Returns 0 in case of success, an error code otherwise
*/
static int
pushBoundaryTest(const char *filename, const char *result,
const char *err ATTRIBUTE_UNUSED,
int options) {
xmlParserCtxtPtr ctxt;
xmlDocPtr doc;
xmlSAXHandler bndSAX;
const char *base;
int size, res, numCallbacks;
int cur = 0;
unsigned long avail, oldConsumed, consumed;
/*
* If the parser made progress, check that exactly one construct was
* processed and that the input buffer is (almost) empty.
* Since we use a chunk size of 1, this tests whether content is
* processed as early as possible.
*/
nb_tests++;
memset(&bndSAX, 0, sizeof(bndSAX));
#ifdef LIBXML_HTML_ENABLED
if (options & XML_PARSE_HTML) {
xmlSAX2InitHtmlDefaultSAXHandler(&bndSAX);
bndSAX.startElement = startElementBnd;
bndSAX.endElement = endElementBnd;
} else
#endif
{
xmlSAXVersion(&bndSAX, 2);
bndSAX.startElementNs = startElementNsBnd;
bndSAX.endElementNs = endElementNsBnd;
}
bndSAX.internalSubset = internalSubsetBnd;
bndSAX.reference = referenceBnd;
bndSAX.characters = charactersBnd;
bndSAX.cdataBlock = cdataBlockBnd;
bndSAX.processingInstruction = processingInstructionBnd;
bndSAX.comment = commentBnd;
/*
* load the document in memory and work from there.
*/
if (loadMem(filename, &base, &size) != 0) {
fprintf(stderr, "Failed to load %s\n", filename);
return(-1);
}
#ifdef LIBXML_HTML_ENABLED
if (options & XML_PARSE_HTML)
ctxt = htmlCreatePushParserCtxt(&bndSAX, NULL, base, 1, filename,
XML_CHAR_ENCODING_NONE);
else
#endif
ctxt = xmlCreatePushParserCtxt(&bndSAX, NULL, base, 1, filename);
xmlCtxtUseOptions(ctxt, options);
cur = 1;
consumed = 0;
numCallbacks = 0;
avail = 0;
while ((cur < size) && (numCallbacks <= 1) && (avail <= 0)) {
int terminate = (cur + 1 >= size);
int isText = 0;
if (ctxt->instate == XML_PARSER_CONTENT) {
int firstChar = (ctxt->input->end > ctxt->input->cur) ?
*ctxt->input->cur :
base[cur];
if ((firstChar != '<') &&
((options & XML_PARSE_HTML) || (firstChar != '&')))
isText = 1;
}
oldConsumed = ctxt->input->consumed +
(unsigned long) (ctxt->input->cur - ctxt->input->base);
pushBoundaryCount = 0;
pushBoundaryRefCount = 0;
pushBoundaryCharsCount = 0;
pushBoundaryCDataCount = 0;
#ifdef LIBXML_HTML_ENABLED
if (options & XML_PARSE_HTML)
htmlParseChunk(ctxt, base + cur, 1, terminate);
else
#endif
xmlParseChunk(ctxt, base + cur, 1, terminate);
cur += 1;
/*
* Callback check: Check that only a single construct was parsed.
*/
if (pushBoundaryRefCount > 0) {
numCallbacks = 1;
} else {
numCallbacks = pushBoundaryCount;
if (pushBoundaryCharsCount > 1) {
if (options & XML_PARSE_HTML) {
/*
* The HTML parser can generate a mix of chars and
* references.
*/
numCallbacks -= pushBoundaryCharsCount - 1;
} else {
/*
* Allow two chars callbacks. This can happen when
* multi-byte chars are split across buffer boundaries.
*/
numCallbacks -= 1;
}
}
if (options & XML_PARSE_HTML) {
/*
* Allow multiple cdata callbacks in HTML mode.
*/
if (pushBoundaryCDataCount > 1)
numCallbacks -= pushBoundaryCDataCount - 1;
}
}
/*
* Buffer check: If input was consumed, check that the input
* buffer is (almost) empty.
*/
consumed = ctxt->input->consumed +
(unsigned long) (ctxt->input->cur - ctxt->input->base);
if ((ctxt->instate != XML_PARSER_DTD) &&
(consumed >= 4) &&
(consumed != oldConsumed)) {
size_t max = 0;
avail = ctxt->input->end - ctxt->input->cur;
if ((options & XML_PARSE_HTML) &&
(ctxt->instate == XML_PARSER_END_TAG)) {
/* Something related to script parsing. */
max = 3;
} else if (isText) {
int c = *ctxt->input->cur;
/* 3 bytes for partial UTF-8 */
max = ((c == '<') || (c == '&')) ? 1 : 3;
} else if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
/* 2 bytes for terminator, 3 bytes for UTF-8 */
max = 5;
}
if (avail <= max)
avail = 0;
}
}
doc = ctxt->myDoc;
#ifdef LIBXML_HTML_ENABLED
if (options & XML_PARSE_HTML)
res = 1;
else
#endif
res = ctxt->wellFormed;
xmlFreeParserCtxt(ctxt);
free((char *)base);
if (numCallbacks > 1) {
xmlFreeDoc(doc);
fprintf(stderr, "Failed push boundary callback test (%d@%lu-%lu): %s\n",
numCallbacks, oldConsumed, consumed, filename);
return(-1);
}
if (avail > 0) {
xmlFreeDoc(doc);
fprintf(stderr, "Failed push boundary buffer test (%lu@%lu): %s\n",
avail, consumed, filename);
return(-1);
}
if (!res) {
xmlFreeDoc(doc);
fprintf(stderr, "Failed to parse %s\n", filename);
return(-1);
}
#ifdef LIBXML_HTML_ENABLED
if (options & XML_PARSE_HTML)
htmlDocDumpMemory(doc, (xmlChar **) &base, &size);
else
#endif
xmlDocDumpMemory(doc, (xmlChar **) &base, &size);
xmlFreeDoc(doc);
res = compareFileMem(result, base, size);
if ((base == NULL) || (res != 0)) {
if (base != NULL)
xmlFree((char *)base);
fprintf(stderr, "Result for %s failed in %s\n", filename, result);
return(-1);
}
xmlFree((char *)base);
if (err != NULL) {
res = compareFileMem(err, testErrors, testErrorsSize);
if (res != 0) {
fprintf(stderr, "Error for %s failed\n", filename);
return(-1);
}
}
return(0);
}
#endif
/**
@ -4660,6 +4960,9 @@ testDesc testDescriptions[] = {
{ "XML push regression tests" ,
pushParseTest, "./test/*", "result/", "", NULL,
0 },
{ "XML push boundary tests" ,
pushBoundaryTest, "./test/*", "result/", "", NULL,
0 },
#endif
#ifdef LIBXML_HTML_ENABLED
{ "HTML regression tests" ,
@ -4672,6 +4975,9 @@ testDesc testDescriptions[] = {
{ "Push HTML regression tests" ,
pushParseTest, "./test/HTML/*", "result/HTML/", "", ".err",
XML_PARSE_HTML },
{ "Push HTML boundary tests" ,
pushBoundaryTest, "./test/HTML/*", "result/HTML/", "", NULL,
XML_PARSE_HTML },
#endif
{ "HTML SAX regression tests" ,
saxParseTest, "./test/HTML/*", "result/HTML/", ".sax", NULL,

16
test/boundaries1.xml Normal file
View File

@ -0,0 +1,16 @@
<?xml version="1.0"?>
<!DOCTYPE d [
<!ENTITY a "]>">
<!ENTITY b ']>'>
<!--> ]> -->
] >
<?pi p1?>
<!--> c1 -->
<d a=">" b='>'>
text&a;text
<![CDATA[cdata]]>
<?pi p2?>
<!--> c2 -->
</d>
<?pi p3?>
<!--> c3 -->