1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-01-26 10:03:34 +03:00

add a new regression test program for testing character ranges and UTF8

* Makefile.am testchar.c Makefile.tests README.tests: add a
  new regression test program for testing character ranges and
  UTF8 encoding/decoding
Daniel

svn path=/trunk/; revision=3754
This commit is contained in:
Daniel Veillard 2008-07-24 15:05:38 +00:00
parent bf0324f11d
commit abade01334
4 changed files with 633 additions and 5 deletions

View File

@ -1,3 +1,9 @@
Thu Jul 24 16:57:20 CEST 2008 Daniel Veillard <daniel@veillard.com>
* Makefile.am testchar.c Makefile.tests README.tests: add a
new regression test program for testing character ranges and
UTF8 encoding/decoding
Wed Jul 23 15:32:39 CEST 2008 Daniel Veillard <daniel@veillard.com> Wed Jul 23 15:32:39 CEST 2008 Daniel Veillard <daniel@veillard.com>
* libxml.spec.in: fix the build root * libxml.spec.in: fix the build root

View File

@ -8,12 +8,12 @@ LIBS=`xml2-config --libs`
THREADLIB= -lpthread THREADLIB= -lpthread
EXEEXT= EXEEXT=
all: runtest$(EXEEXT) runsuite$(EXEEXT) testapi$(EXEEXT) all: runtest$(EXEEXT) runsuite$(EXEEXT) testapi$(EXEEXT) testchar$(EXEEXT)
clean: clean:
$(RM) runtest$(EXEEXT) runsuite$(EXEEXT) testapi$(EXEEXT) $(RM) runtest$(EXEEXT) runsuite$(EXEEXT) testapi$(EXEEXT)
check: do_runtest do_runsuite do_testapi check: do_runtest do_testchar do_testapi do_runsuite
runtest$(EXEEXT): runtest.c runtest$(EXEEXT): runtest.c
$(CC) -o runtest$(EXEEXT) $(CFLAGS) runtest.c $(LIBS) $(THREADLIB) $(CC) -o runtest$(EXEEXT) $(CFLAGS) runtest.c $(LIBS) $(THREADLIB)
@ -33,3 +33,9 @@ testapi$(EXEEXT): testapi.c
do_testapi: testapi$(EXEEXT) do_testapi: testapi$(EXEEXT)
./testapi ./testapi
testchar$(EXEEXT): testchar.c
$(CC) -o testchar$(EXEEXT) $(CFLAGS) testchar.c $(LIBS)
do_testchar: testchar$(EXEEXT)
./testchar

View File

@ -10,13 +10,14 @@ program has a different testing purpose:
runtest.c : runs libxml2 basic internal regression tests runtest.c : runs libxml2 basic internal regression tests
runsuite.c: runs libxml2 against external regression tests runsuite.c: runs libxml2 against external regression tests
testapi.c : exercises the library public entry points testapi.c : exercises the library public entry points
testchar.c: exercise the check of character ranges and UTF-8 validation
The command: The command:
make -f Makefile.tests make -f Makefile.tests check
should be sufficient on an Unix system to build and exercise the tests should be sufficient on an Unix system to build and exercise the tests
for the version of the library installed on the system. Note however for the version of the library installed on the system. Note however
that there isn't backward compatibility provided so if the installed that there isn't backward compatibility provided so if the installed
version is older to the testsuite one, failing to compile or run the tests version is older to the testsuite one, failing to compile or run the tests
is likely. In any event this won't work with an installed libxml2 older is likely. In any event this won't work with an installed libxml2 older
@ -26,4 +27,4 @@ like any other program using libxml2, running the test should be done
simply by launching the resulting executables. simply by launching the resulting executables.
Daniel Veillard Daniel Veillard
Sun Jul 10 2005 Thu Jul 24 2008

615
testchar.c Normal file
View File

@ -0,0 +1,615 @@
/**
* Test the UTF-8 decoding routines
*
* author: Daniel Veillard
* copy: see Copyright for the status of this software.
*/
#include <stdio.h>
#include <string.h>
#include <libxml/parser.h>
#include <libxml/parserInternals.h>
int lastError;
static void errorHandler(void *unused, xmlErrorPtr err) {
if ((unused == NULL) && (err != NULL) && (lastError == 0)) {
lastError = err->code;
}
}
char document1[100] = "<doc>XXXX</doc>";
char document2[100] = "<doc foo='XXXX'/>";
static void testDocumentRangeByte1(xmlParserCtxtPtr ctxt, char *document,
int len, char *data, int forbid1, int forbid2) {
int i;
xmlDocPtr res;
for (i = 0;i <= 0xFF;i++) {
lastError = 0;
xmlCtxtReset(ctxt);
data[0] = i;
res = xmlReadMemory(document, len, "test", NULL, 0);
if ((i == forbid1) || (i == forbid2)) {
if ((lastError == 0) || (res != NULL))
fprintf(stderr,
"Failed to detect invalid char for Byte 0x%02X: %c\n",
i, i);
}
else if ((i == '<') || (i == '&')) {
if ((lastError == 0) || (res != NULL))
fprintf(stderr,
"Failed to detect illegal char %c for Byte 0x%02X\n", i, i);
}
else if (((i < 0x20) || (i >= 0x80)) &&
(i != 0x9) && (i != 0xA) && (i != 0xD)) {
if ((lastError != XML_ERR_INVALID_CHAR) && (res != NULL))
fprintf(stderr,
"Failed to detect invalid char for Byte 0x%02X\n", i);
}
else if (res == NULL) {
fprintf(stderr,
"Failed to parse valid char for Byte 0x%02X : %c\n", i, i);
}
if (res != NULL)
xmlFreeDoc(res);
}
}
static void testDocumentRangeByte2(xmlParserCtxtPtr ctxt, char *document,
int len, char *data) {
int i, j;
xmlDocPtr res;
for (i = 0x80;i <= 0xFF;i++) {
for (j = 0;j <= 0xFF;j++) {
lastError = 0;
xmlCtxtReset(ctxt);
data[0] = i;
data[1] = j;
res = xmlReadMemory(document, len, "test", NULL, 0);
/* if first bit of first char is set, then second bit must too */
if ((i & 0x80) && ((i & 0x40) == 0)) {
if ((lastError == 0) || (res != NULL))
fprintf(stderr,
"Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
i, j);
}
/*
* if first bit of first char is set, then second char first
* bits must be 10
*/
else if ((i & 0x80) && ((j & 0xC0) != 0x80)) {
if ((lastError == 0) || (res != NULL))
fprintf(stderr,
"Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
i, j);
}
/*
* if using a 2 byte encoding then the value must be greater
* than 0x80, i.e. one of bits 5 to 1 of i must be set
*/
else if ((i & 0x80) && ((i & 0x1E) == 0)) {
if ((lastError == 0) || (res != NULL))
fprintf(stderr,
"Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
i, j);
}
/*
* if third bit of first char is set, then the sequence would need
* at least 3 bytes, but we give only 2 !
*/
else if ((i & 0xE0) == 0xE0) {
if ((lastError == 0) || (res != NULL))
fprintf(stderr,
"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x00\n",
i, j);
}
/*
* We should see no error in remaning cases
*/
else if ((lastError != 0) || (res == NULL)) {
fprintf(stderr,
"Failed to parse document for Bytes 0x%02X 0x%02X\n", i, j);
}
if (res != NULL)
xmlFreeDoc(res);
}
}
}
/**
* testDocumentRanges:
*
* Test the correct UTF8 character parsing in context of XML documents
* Those are in-context injection tests checking the parser behaviour on
* edge case values at different point in content, beginning and end of
* CDATA in text or in attribute values.
*/
static void testDocumentRanges(void) {
xmlParserCtxtPtr ctxt;
char *data;
/*
* Set up a parsing context using the first document as
* the current input source.
*/
ctxt = xmlNewParserCtxt();
if (ctxt == NULL) {
fprintf(stderr, "Failed to allocate parser context\n");
return;
}
printf("testing 1 byte char in document: 1");
fflush(stdout);
data = &document1[5];
data[0] = ' ';
data[1] = ' ';
data[2] = ' ';
data[3] = ' ';
/* test 1 byte injection at beginning of area */
testDocumentRangeByte1(ctxt, &document1[0], strlen(document1),
data, -1, -1);
printf(" 2");
fflush(stdout);
data[0] = ' ';
data[1] = ' ';
data[2] = ' ';
data[3] = ' ';
/* test 1 byte injection at end of area */
testDocumentRangeByte1(ctxt, &document1[0], strlen(document1),
data + 3, -1, -1);
printf(" 3");
fflush(stdout);
data = &document2[10];
data[0] = ' ';
data[1] = ' ';
data[2] = ' ';
data[3] = ' ';
/* test 1 byte injection at beginning of area */
testDocumentRangeByte1(ctxt, &document2[0], strlen(document2),
data, '\'', -1);
printf(" 4");
fflush(stdout);
data[0] = ' ';
data[1] = ' ';
data[2] = ' ';
data[3] = ' ';
/* test 1 byte injection at end of area */
testDocumentRangeByte1(ctxt, &document2[0], strlen(document2),
data + 3, '\'', -1);
printf(" done\n");
printf("testing 2 byte char in document: 1");
fflush(stdout);
data = &document1[5];
data[0] = ' ';
data[1] = ' ';
data[2] = ' ';
data[3] = ' ';
/* test 2 byte injection at beginning of area */
testDocumentRangeByte2(ctxt, &document1[0], strlen(document1),
data);
printf(" 2");
fflush(stdout);
data[0] = ' ';
data[1] = ' ';
data[2] = ' ';
data[3] = ' ';
/* test 2 byte injection at end of area */
testDocumentRangeByte2(ctxt, &document1[0], strlen(document1),
data + 2);
printf(" 3");
fflush(stdout);
data = &document2[10];
data[0] = ' ';
data[1] = ' ';
data[2] = ' ';
data[3] = ' ';
/* test 2 byte injection at beginning of area */
testDocumentRangeByte2(ctxt, &document2[0], strlen(document2),
data);
printf(" 4");
fflush(stdout);
data[0] = ' ';
data[1] = ' ';
data[2] = ' ';
data[3] = ' ';
/* test 2 byte injection at end of area */
testDocumentRangeByte2(ctxt, &document2[0], strlen(document2),
data + 2);
printf(" done\n");
xmlFreeParserCtxt(ctxt);
}
static void testCharRangeByte1(xmlParserCtxtPtr ctxt, char *data) {
int i = 0;
int len, c;
data[1] = 0;
data[2] = 0;
data[3] = 0;
for (i = 0;i <= 0xFF;i++) {
data[0] = i;
ctxt->charset = XML_CHAR_ENCODING_UTF8;
lastError = 0;
c = xmlCurrentChar(ctxt, &len);
if ((i == 0) || (i >= 0x80)) {
/* we must see an error there */
if (lastError != XML_ERR_INVALID_CHAR)
fprintf(stderr,
"Failed to detect invalid char for Byte 0x%02X\n", i);
} else if (i == 0xD) {
if ((c != 0xA) || (len != 1))
fprintf(stderr, "Failed to convert char for Byte 0x%02X\n", i);
} else if ((c != i) || (len != 1)) {
fprintf(stderr, "Failed to parse char for Byte 0x%02X\n", i);
}
}
}
static void testCharRangeByte2(xmlParserCtxtPtr ctxt, char *data) {
int i, j;
int len, c;
data[2] = 0;
data[3] = 0;
for (i = 0x80;i <= 0xFF;i++) {
for (j = 0;j <= 0xFF;j++) {
data[0] = i;
data[1] = j;
ctxt->charset = XML_CHAR_ENCODING_UTF8;
lastError = 0;
c = xmlCurrentChar(ctxt, &len);
/* if first bit of first char is set, then second bit must too */
if ((i & 0x80) && ((i & 0x40) == 0)) {
if (lastError != XML_ERR_INVALID_CHAR)
fprintf(stderr,
"Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
i, j);
}
/*
* if first bit of first char is set, then second char first
* bits must be 10
*/
else if ((i & 0x80) && ((j & 0xC0) != 0x80)) {
if (lastError != XML_ERR_INVALID_CHAR)
fprintf(stderr,
"Failed to detect invalid char for Bytes 0x%02X 0x%02X: %d\n",
i, j, c);
}
/*
* if using a 2 byte encoding then the value must be greater
* than 0x80, i.e. one of bits 5 to 1 of i must be set
*/
else if ((i & 0x80) && ((i & 0x1E) == 0)) {
if (lastError != XML_ERR_INVALID_CHAR)
fprintf(stderr,
"Failed to detect invalid char for Bytes 0x%02X 0x%02X: %d\n",
i, j, c);
}
/*
* if third bit of first char is set, then the sequence would need
* at least 3 bytes, but we give only 2 !
*/
else if ((i & 0xE0) == 0xE0) {
if (lastError != XML_ERR_INVALID_CHAR)
fprintf(stderr,
"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x00\n",
i, j);
}
/*
* We should see no error in remaning cases
*/
else if ((lastError != 0) || (len != 2)) {
fprintf(stderr,
"Failed to parse char for Bytes 0x%02X 0x%02X\n", i, j);
}
/*
* Finally check the value is right
*/
else if (c != (j & 0x3F) + ((i & 0x1F) << 6)) {
fprintf(stderr,
"Failed to parse char for Bytes 0x%02X 0x%02X: expect %d got %d\n",
i, j, ((j & 0x3F) + ((i & 0x1F) << 6)), c);
}
}
}
}
static void testCharRangeByte3(xmlParserCtxtPtr ctxt, char *data) {
int i, j, k, K;
int len, c;
unsigned char lows[6] = {0, 0x80, 0x81, 0xC1, 0xFF, 0xBF};
int value;
data[3] = 0;
for (i = 0xE0;i <= 0xFF;i++) {
for (j = 0;j <= 0xFF;j++) {
for (k = 0;k < 6;k++) {
data[0] = i;
data[1] = j;
K = lows[k];
data[2] = (char) K;
value = (K & 0x3F) + ((j & 0x3F) << 6) + ((i & 0xF) << 12);
ctxt->charset = XML_CHAR_ENCODING_UTF8;
lastError = 0;
c = xmlCurrentChar(ctxt, &len);
/*
* if fourth bit of first char is set, then the sequence would need
* at least 4 bytes, but we give only 3 !
*/
if ((i & 0xF0) == 0xF0) {
if (lastError != XML_ERR_INVALID_CHAR)
fprintf(stderr,
"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
i, j, K, data[3]);
}
/*
* The second and the third bytes must start with 10
*/
else if (((j & 0xC0) != 0x80) || ((K & 0xC0) != 0x80)) {
if (lastError != XML_ERR_INVALID_CHAR)
fprintf(stderr,
"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X\n",
i, j, K);
}
/*
* if using a 3 byte encoding then the value must be greater
* than 0x800, i.e. one of bits 4 to 0 of i must be set or
* the 6th byte of data[1] must be set
*/
else if (((i & 0xF) == 0) && ((j & 0x20) == 0)) {
if (lastError != XML_ERR_INVALID_CHAR)
fprintf(stderr,
"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X\n",
i, j, K);
}
/*
* There are values in that range that are not allowed in XML-1.0
*/
else if (((value > 0xD7FF) && (value <0xE000)) ||
((value > 0xFFFD) && (value <0x10000))) {
if (lastError != XML_ERR_INVALID_CHAR)
fprintf(stderr,
"Failed to detect invalid char 0x%04X for Bytes 0x%02X 0x%02X 0x%02X\n",
value, i, j, K);
}
/*
* We should see no error in remaining cases
*/
else if ((lastError != 0) || (len != 3)) {
fprintf(stderr,
"Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X\n",
i, j, K);
}
/*
* Finally check the value is right
*/
else if (c != value) {
fprintf(stderr,
"Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X: expect %d got %d\n",
i, j, data[2], value, c);
}
}
}
}
}
static void testCharRangeByte4(xmlParserCtxtPtr ctxt, char *data) {
int i, j, k, K, l, L;
int len, c;
unsigned char lows[6] = {0, 0x80, 0x81, 0xC1, 0xFF, 0xBF};
int value;
data[4] = 0;
for (i = 0xF0;i <= 0xFF;i++) {
for (j = 0;j <= 0xFF;j++) {
for (k = 0;k < 6;k++) {
for (l = 0;l < 6;l++) {
data[0] = i;
data[1] = j;
K = lows[k];
data[2] = (char) K;
L = lows[l];
data[3] = (char) L;
value = (L & 0x3F) + ((K & 0x3F) << 6) + ((j & 0x3F) << 12) +
((i & 0x7) << 18);
ctxt->charset = XML_CHAR_ENCODING_UTF8;
lastError = 0;
c = xmlCurrentChar(ctxt, &len);
/*
* if fifth bit of first char is set, then the sequence would need
* at least 5 bytes, but we give only 4 !
*/
if ((i & 0xF8) == 0xF8) {
if (lastError != XML_ERR_INVALID_CHAR)
fprintf(stderr,
"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
i, j, K, data[3]);
}
/*
* The second, third and fourth bytes must start with 10
*/
else if (((j & 0xC0) != 0x80) || ((K & 0xC0) != 0x80) ||
((L & 0xC0) != 0x80)) {
if (lastError != XML_ERR_INVALID_CHAR)
fprintf(stderr,
"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
i, j, K, L);
}
/*
* if using a 3 byte encoding then the value must be greater
* than 0x10000, i.e. one of bits 3 to 0 of i must be set or
* the 6 or 5th byte of j must be set
*/
else if (((i & 0x7) == 0) && ((j & 0x30) == 0)) {
if (lastError != XML_ERR_INVALID_CHAR)
fprintf(stderr,
"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
i, j, K, L);
}
/*
* There are values in that range that are not allowed in XML-1.0
*/
else if (((value > 0xD7FF) && (value <0xE000)) ||
((value > 0xFFFD) && (value <0x10000)) ||
(value > 0x10FFFF)) {
if (lastError != XML_ERR_INVALID_CHAR)
fprintf(stderr,
"Failed to detect invalid char 0x%04X for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
value, i, j, K, L);
}
/*
* We should see no error in remaining cases
*/
else if ((lastError != 0) || (len != 4)) {
fprintf(stderr,
"Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X\n",
i, j, K);
}
/*
* Finally check the value is right
*/
else if (c != value) {
fprintf(stderr,
"Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X: expect %d got %d\n",
i, j, data[2], value, c);
}
}
}
}
}
}
/**
* testCharRanges:
*
* Test the correct UTF8 character parsing in isolation i.e.
* not when parsing a full document, this is less expensive and we can
* cover the full range of UTF-8 chars accepted by XML-1.0
*/
static void testCharRanges(void) {
char data[5];
xmlParserCtxtPtr ctxt;
xmlParserInputBufferPtr buf;
xmlParserInputPtr input;
memset(data, 0, 5);
/*
* Set up a parsing context using the above data buffer as
* the current input source.
*/
ctxt = xmlNewParserCtxt();
if (ctxt == NULL) {
fprintf(stderr, "Failed to allocate parser context\n");
return;
}
buf = xmlParserInputBufferCreateStatic(data, sizeof(data),
XML_CHAR_ENCODING_NONE);
if (buf == NULL) {
fprintf(stderr, "Failed to allocate input buffer\n");
goto error;
}
input = xmlNewInputStream(ctxt);
if (input == NULL) {
xmlFreeParserInputBuffer(buf);
goto error;
}
input->filename = NULL;
input->buf = buf;
input->base = input->buf->buffer->content;
input->cur = input->buf->buffer->content;
input->end = &input->buf->buffer->content[4];
inputPush(ctxt, input);
printf("testing char range: 1");
fflush(stdout);
testCharRangeByte1(ctxt, data);
printf(" 2");
fflush(stdout);
testCharRangeByte2(ctxt, data);
printf(" 3");
fflush(stdout);
testCharRangeByte3(ctxt, data);
printf(" 4");
fflush(stdout);
testCharRangeByte4(ctxt, data);
printf(" done\n");
fflush(stdout);
error:
xmlFreeParserCtxt(ctxt);
}
int main(void) {
/*
* this initialize the library and check potential ABI mismatches
* between the version it was compiled for and the actual shared
* library used.
*/
LIBXML_TEST_VERSION
/*
* Catch errors separately
*/
xmlSetStructuredErrorFunc(NULL, errorHandler);
/*
* Run the tests
*/
testCharRanges();
testDocumentRanges();
/*
* Cleanup function for the XML library.
*/
xmlCleanupParser();
/*
* this is to debug memory for regression tests
*/
xmlMemoryDump();
return(0);
}