1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2024-10-26 20:25:14 +03:00
libxml2/testparser.c
Nick Wellnhofer 221df37529 parser: Support custom charset conversion implementations
Implement xmlCtxtSetCharEncConvImpl. I agree that the name is terrible.
2024-07-01 18:05:40 +02:00

637 lines
15 KiB
C

/*
* testparser.c: Additional parser tests
*
* See Copyright for the status of this software.
*/
#include "libxml.h"
#include <libxml/parser.h>
#include <libxml/uri.h>
#include <libxml/xmlreader.h>
#include <libxml/xmlwriter.h>
#include <libxml/HTMLparser.h>
#include <string.h>
static int
testStandaloneWithEncoding(void) {
xmlDocPtr doc;
const char *str =
"<?xml version=\"1.0\" standalone=\"yes\"?>\n"
"<doc></doc>\n";
int err = 0;
xmlResetLastError();
doc = xmlReadDoc(BAD_CAST str, NULL, "UTF-8", 0);
if (doc == NULL) {
fprintf(stderr, "xmlReadDoc failed\n");
err = 1;
}
xmlFreeDoc(doc);
return err;
}
static int
testUnsupportedEncoding(void) {
xmlDocPtr doc;
const xmlError *error;
int err = 0;
xmlResetLastError();
doc = xmlReadDoc(BAD_CAST "<doc/>", NULL, "#unsupported",
XML_PARSE_NOWARNING);
if (doc == NULL) {
fprintf(stderr, "xmlReadDoc failed with unsupported encoding\n");
err = 1;
}
xmlFreeDoc(doc);
error = xmlGetLastError();
if (error == NULL ||
error->code != XML_ERR_UNSUPPORTED_ENCODING ||
error->level != XML_ERR_WARNING ||
strcmp(error->message, "Unsupported encoding: #unsupported\n") != 0)
{
fprintf(stderr, "xmlReadDoc failed to raise correct error\n");
err = 1;
}
return err;
}
static int
testNodeGetContent(void) {
xmlDocPtr doc;
xmlChar *content;
int err = 0;
doc = xmlReadDoc(BAD_CAST "<doc/>", NULL, NULL, 0);
xmlAddChild(doc->children, xmlNewReference(doc, BAD_CAST "lt"));
content = xmlNodeGetContent((xmlNodePtr) doc);
if (strcmp((char *) content, "<") != 0) {
fprintf(stderr, "xmlNodeGetContent failed\n");
err = 1;
}
xmlFree(content);
xmlFreeDoc(doc);
return err;
}
#ifdef LIBXML_SAX1_ENABLED
static int
testBalancedChunk(void) {
xmlNodePtr list;
xmlNodePtr elem;
int ret;
int err = 0;
ret = xmlParseBalancedChunkMemory(NULL, NULL, NULL, 0,
BAD_CAST "start <node xml:lang='en'>abc</node> end", &list);
if ((ret != XML_ERR_OK) ||
(list == NULL) ||
((elem = list->next) == NULL) ||
(elem->type != XML_ELEMENT_NODE) ||
(elem->nsDef == NULL) ||
(!xmlStrEqual(elem->nsDef->href, XML_XML_NAMESPACE))) {
fprintf(stderr, "xmlParseBalancedChunkMemory failed\n");
err = 1;
}
xmlFreeNodeList(list);
return(err);
}
#endif
#ifdef LIBXML_PUSH_ENABLED
static int
testHugePush(void) {
xmlParserCtxtPtr ctxt;
int err, i;
ctxt = xmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL);
/*
* Push parse a document larger than XML_MAX_LOOKUP_LIMIT
* (10,000,000 bytes). This mainly tests whether shrinking the
* buffer works when push parsing.
*/
xmlParseChunk(ctxt, "<doc>", 5, 0);
for (i = 0; i < 1000000; i++)
xmlParseChunk(ctxt, "<elem>text</elem>", 17, 0);
xmlParseChunk(ctxt, "</doc>", 6, 1);
err = ctxt->wellFormed ? 0 : 1;
xmlFreeDoc(ctxt->myDoc);
xmlFreeParserCtxt(ctxt);
return err;
}
static int
testHugeEncodedChunk(void) {
xmlBufferPtr buf;
xmlChar *chunk;
xmlParserCtxtPtr ctxt;
int err, i;
/*
* Test the push parser with a built-in encoding handler like ISO-8859-1
* and a chunk larger than the initial decoded buffer (currently 4 KB).
*/
buf = xmlBufferCreate();
xmlBufferCat(buf,
BAD_CAST "<?xml version='1.0' encoding='ISO-8859-1'?>\n");
xmlBufferCat(buf, BAD_CAST "<doc><!-- ");
for (i = 0; i < 2000; i++)
xmlBufferCat(buf, BAD_CAST "0123456789");
xmlBufferCat(buf, BAD_CAST " --></doc>");
chunk = xmlBufferDetach(buf);
xmlBufferFree(buf);
ctxt = xmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL);
xmlParseChunk(ctxt, (char *) chunk, xmlStrlen(chunk), 0);
xmlParseChunk(ctxt, NULL, 0, 1);
err = ctxt->wellFormed ? 0 : 1;
xmlFreeDoc(ctxt->myDoc);
xmlFreeParserCtxt(ctxt);
xmlFree(chunk);
return err;
}
#ifdef LIBXML_HTML_ENABLED
static int
testHtmlPushWithEncoding(void) {
htmlParserCtxtPtr ctxt;
htmlDocPtr doc;
htmlNodePtr node;
int err = 0;
ctxt = htmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL,
XML_CHAR_ENCODING_UTF8);
htmlParseChunk(ctxt, "-\xC3\xA4-", 4, 1);
doc = ctxt->myDoc;
if (!xmlStrEqual(doc->encoding, BAD_CAST "UTF-8")) {
fprintf(stderr, "testHtmlPushWithEncoding failed\n");
err = 1;
}
node = xmlDocGetRootElement(doc)->children->children->children;
if (!xmlStrEqual(node->content, BAD_CAST "-\xC3\xA4-")) {
fprintf(stderr, "testHtmlPushWithEncoding failed\n");
err = 1;
}
xmlFreeDoc(doc);
htmlFreeParserCtxt(ctxt);
return err;
}
#endif
#endif
#ifdef LIBXML_READER_ENABLED
static int
testReaderEncoding(void) {
xmlBuffer *buf;
xmlTextReader *reader;
xmlChar *xml;
const xmlChar *encoding;
int err = 0;
int i;
buf = xmlBufferCreate();
xmlBufferCCat(buf, "<?xml version='1.0' encoding='ISO-8859-1'?>\n");
xmlBufferCCat(buf, "<doc>");
for (i = 0; i < 8192; i++)
xmlBufferCCat(buf, "x");
xmlBufferCCat(buf, "</doc>");
xml = xmlBufferDetach(buf);
xmlBufferFree(buf);
reader = xmlReaderForDoc(BAD_CAST xml, NULL, NULL, 0);
xmlTextReaderRead(reader);
encoding = xmlTextReaderConstEncoding(reader);
if (!xmlStrEqual(encoding, BAD_CAST "ISO-8859-1")) {
fprintf(stderr, "testReaderEncoding failed\n");
err = 1;
}
xmlFreeTextReader(reader);
xmlFree(xml);
return err;
}
static int
testReaderContent(void) {
xmlTextReader *reader;
const xmlChar *xml = BAD_CAST "<d>x<e>y</e><f>z</f></d>";
xmlChar *string;
int err = 0;
reader = xmlReaderForDoc(xml, NULL, NULL, 0);
xmlTextReaderRead(reader);
string = xmlTextReaderReadOuterXml(reader);
if (!xmlStrEqual(string, xml)) {
fprintf(stderr, "xmlTextReaderReadOuterXml failed\n");
err = 1;
}
xmlFree(string);
string = xmlTextReaderReadInnerXml(reader);
if (!xmlStrEqual(string, BAD_CAST "x<e>y</e><f>z</f>")) {
fprintf(stderr, "xmlTextReaderReadInnerXml failed\n");
err = 1;
}
xmlFree(string);
string = xmlTextReaderReadString(reader);
if (!xmlStrEqual(string, BAD_CAST "xyz")) {
fprintf(stderr, "xmlTextReaderReadString failed\n");
err = 1;
}
xmlFree(string);
xmlFreeTextReader(reader);
return err;
}
#ifdef LIBXML_XINCLUDE_ENABLED
typedef struct {
char *message;
int code;
} testReaderErrorCtxt;
static void
testReaderError(void *arg, const char *msg,
xmlParserSeverities severity ATTRIBUTE_UNUSED,
xmlTextReaderLocatorPtr locator ATTRIBUTE_UNUSED) {
testReaderErrorCtxt *ctxt = arg;
if (ctxt->message != NULL)
xmlFree(ctxt->message);
ctxt->message = xmlMemStrdup(msg);
}
static void
testStructuredReaderError(void *arg, const xmlError *error) {
testReaderErrorCtxt *ctxt = arg;
if (ctxt->message != NULL)
xmlFree(ctxt->message);
ctxt->message = xmlMemStrdup(error->message);
ctxt->code = error->code;
}
static int
testReaderXIncludeError(void) {
/*
* Test whether XInclude errors are reported to the custom error
* handler of a reader.
*/
const char *doc =
"<doc xmlns:xi='http://www.w3.org/2001/XInclude'>\n"
" <xi:include/>\n"
"</doc>\n";
xmlTextReader *reader;
testReaderErrorCtxt errorCtxt;
int err = 0;
reader = xmlReaderForDoc(BAD_CAST doc, NULL, NULL, XML_PARSE_XINCLUDE);
xmlTextReaderSetErrorHandler(reader, testReaderError, &errorCtxt);
errorCtxt.message = NULL;
errorCtxt.code = 0;
while (xmlTextReaderRead(reader) > 0)
;
if (errorCtxt.message == NULL ||
strstr(errorCtxt.message, "href or xpointer") == NULL) {
fprintf(stderr, "xmlTextReaderSetErrorHandler failed\n");
err = 1;
}
xmlFree(errorCtxt.message);
xmlFreeTextReader(reader);
reader = xmlReaderForDoc(BAD_CAST doc, NULL, NULL, XML_PARSE_XINCLUDE);
xmlTextReaderSetStructuredErrorHandler(reader, testStructuredReaderError,
&errorCtxt);
errorCtxt.message = NULL;
errorCtxt.code = 0;
while (xmlTextReaderRead(reader) > 0)
;
if (errorCtxt.code != XML_XINCLUDE_NO_HREF ||
errorCtxt.message == NULL ||
strstr(errorCtxt.message, "href or xpointer") == NULL) {
fprintf(stderr, "xmlTextReaderSetStructuredErrorHandler failed\n");
err = 1;
}
xmlFree(errorCtxt.message);
xmlFreeTextReader(reader);
return err;
}
#endif
#endif
#ifdef LIBXML_WRITER_ENABLED
static int
testWriterIOWrite(void *ctxt, const char *data, int len) {
(void) ctxt;
(void) data;
return len;
}
static int
testWriterIOClose(void *ctxt) {
(void) ctxt;
return XML_IO_ENAMETOOLONG;
}
static int
testWriterClose(void){
xmlOutputBufferPtr out;
xmlTextWriterPtr writer;
int err = 0;
int result;
out = xmlOutputBufferCreateIO(testWriterIOWrite, testWriterIOClose,
NULL, NULL);
writer = xmlNewTextWriter(out);
xmlTextWriterStartDocument(writer, "1.0", "UTF-8", NULL);
xmlTextWriterStartElement(writer, BAD_CAST "elem");
xmlTextWriterEndElement(writer);
xmlTextWriterEndDocument(writer);
result = xmlTextWriterClose(writer);
if (result != XML_IO_ENAMETOOLONG) {
fprintf(stderr, "xmlTextWriterClose reported wrong error %d\n",
result);
err = 1;
}
xmlFreeTextWriter(writer);
return err;
}
#endif
typedef struct {
const char *uri;
const char *base;
const char *result;
} xmlRelativeUriTest;
static int
testBuildRelativeUri(void) {
xmlChar *res;
int err = 0;
int i;
static const xmlRelativeUriTest tests[] = {
{
"/a/b1/c1",
"/a/b2/c2",
"../b1/c1"
}, {
"a/b1/c1",
"a/b2/c2",
"../b1/c1"
}, {
"a/././b1/x/y/../z/../.././c1",
"./a/./b2/././b2",
"../b1/c1"
}, {
"file:///a/b1/c1",
"/a/b2/c2",
NULL
}, {
"/a/b1/c1",
"file:///a/b2/c2",
NULL
}, {
"a/b1/c1",
"/a/b2/c2",
NULL
}, {
"/a/b1/c1",
"a/b2/c2",
NULL
}, {
"http://example.org/a/b1/c1",
"http://example.org/a/b2/c2",
"../b1/c1"
}, {
"http://example.org/a/b1/c1",
"https://example.org/a/b2/c2",
NULL
}, {
"http://example.org/a/b1/c1",
"http://localhost/a/b2/c2",
NULL
}, {
"with space/x x/y y",
"with space/b2/c2",
"../x%20x/y%20y"
}, {
"with space/x x/y y",
"/b2/c2",
"with%20space/x%20x/y%20y"
}
#if defined(_WIN32) || defined(__CYGWIN__)
, {
"\\a\\b1\\c1",
"\\a\\b2\\c2",
"../b1/c1"
}, {
"\\a\\b1\\c1",
"/a/b2/c2",
"../b1/c1"
}, {
"a\\b1\\c1",
"a/b2/c2",
"../b1/c1"
}, {
"file://server/a/b1/c1",
"\\\\?\\UNC\\server\\a\\b2\\c2",
"../b1/c1"
}, {
"file://server/a/b1/c1",
"\\\\server\\a\\b2\\c2",
"../b1/c1"
}, {
"file:///x:/a/b1/c1",
"x:\\a\\b2\\c2",
"../b1/c1"
}, {
"file:///x:/a/b1/c1",
"\\\\?\\x:\\a\\b2\\c2",
"../b1/c1"
}, {
"file:///x:/a/b1/c1",
"file:///y:/a/b2/c2",
NULL
}, {
"x:/a/b1/c1",
"y:/a/b2/c2",
"file:///x:/a/b1/c1"
}, {
"/a/b1/c1",
"y:/a/b2/c2",
NULL
}, {
"\\\\server\\a\\b1\\c1",
"a/b2/c2",
"file://server/a/b1/c1"
}
#endif
};
for (i = 0; (size_t) i < sizeof(tests) / sizeof(tests[0]); i++) {
const xmlRelativeUriTest *test = tests + i;
const char *expect;
res = xmlBuildRelativeURI(BAD_CAST test->uri, BAD_CAST test->base);
expect = test->result ? test->result : test->uri;
if (!xmlStrEqual(res, BAD_CAST expect)) {
fprintf(stderr, "xmlBuildRelativeURI failed uri=%s base=%s "
"result=%s expected=%s\n", test->uri, test->base,
res, expect);
err = 1;
}
xmlFree(res);
}
return err;
}
static int charEncConvImplError;
static int
rot13Convert(void *vctxt, unsigned char *out, int *outlen,
const unsigned char *in, int *inlen) {
int *ctxt = vctxt;
int inSize = *inlen;
int outSize = *outlen;
int rot, i;
rot = *ctxt;
for (i = 0; i < inSize && i < outSize; i++) {
int c = in[i];
if (c >= 'A' && c <= 'Z')
c = 'A' + (c - 'A' + rot) % 26;
else if (c >= 'a' && c <= 'z')
c = 'a' + (c - 'a' + rot) % 26;
out[i] = c;
}
*inlen = i;
*outlen = i;
return XML_ENC_ERR_SUCCESS;
}
static void
rot13ConvCtxtDtor(void *vctxt) {
xmlFree(vctxt);
}
static int
rot13ConvImpl(void *vctxt ATTRIBUTE_UNUSED, const char *name,
xmlCharEncConverter *conv) {
int *inputCtxt;
if (strcmp(name, "rot13") != 0) {
fprintf(stderr, "rot13ConvImpl received wrong name\n");
charEncConvImplError = 1;
return XML_ERR_UNSUPPORTED_ENCODING;
}
conv->convert = rot13Convert;
conv->ctxtDtor = rot13ConvCtxtDtor;
inputCtxt = xmlMalloc(sizeof(*inputCtxt));
*inputCtxt = 13;
conv->inputCtxt = inputCtxt;
return XML_ERR_OK;
}
static int
testCharEncConvImpl(void) {
xmlParserCtxtPtr ctxt;
xmlDocPtr doc;
xmlNodePtr root;
int err = 0;
ctxt = xmlNewParserCtxt();
xmlCtxtSetCharEncConvImpl(ctxt, rot13ConvImpl, NULL);
charEncConvImplError = 0;
doc = xmlCtxtReadDoc(ctxt, BAD_CAST "<?kzy irefvba='1.0'?><qbp/>", NULL,
"rot13", 0);
if (charEncConvImplError)
err = 1;
xmlFreeParserCtxt(ctxt);
root = xmlDocGetRootElement(doc);
if (root == NULL || strcmp((char *) root->name, "doc") != 0) {
fprintf(stderr, "testCharEncConvImpl failed\n");
err = 1;
}
xmlFreeDoc(doc);
return err;
}
int
main(void) {
int err = 0;
err |= testStandaloneWithEncoding();
err |= testUnsupportedEncoding();
err |= testNodeGetContent();
#ifdef LIBXML_SAX1_ENABLED
err |= testBalancedChunk();
#endif
#ifdef LIBXML_PUSH_ENABLED
err |= testHugePush();
err |= testHugeEncodedChunk();
#ifdef LIBXML_HTML_ENABLED
err |= testHtmlPushWithEncoding();
#endif
#endif
#ifdef LIBXML_READER_ENABLED
err |= testReaderEncoding();
err |= testReaderContent();
#ifdef LIBXML_XINCLUDE_ENABLED
err |= testReaderXIncludeError();
#endif
#endif
#ifdef LIBXML_WRITER_ENABLED
err |= testWriterClose();
#endif
err |= testBuildRelativeUri();
err |= testCharEncConvImpl();
return err;
}