1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-03-25 10:50:08 +03:00

fuzz: Start to fuzz XML Schema validator

This commit is contained in:
Nick Wellnhofer 2024-12-10 15:54:12 +01:00
parent 9f86dae989
commit fd359a7e49
5 changed files with 269 additions and 36 deletions

View File

@ -40,6 +40,8 @@ static struct {
/* The first entity is the main entity. */
const char *mainUrl;
xmlFuzzEntityInfo *mainEntity;
const char *secondaryUrl;
xmlFuzzEntityInfo *secondaryEntity;
} fuzzData;
size_t fuzzNumAttempts;
@ -195,6 +197,8 @@ xmlFuzzDataInit(const char *data, size_t size) {
fuzzData.entities = xmlHashCreate(8);
fuzzData.mainUrl = NULL;
fuzzData.mainEntity = NULL;
fuzzData.secondaryUrl = NULL;
fuzzData.secondaryEntity = NULL;
}
/**
@ -390,6 +394,9 @@ xmlFuzzReadEntities(void) {
if (num == 0) {
fuzzData.mainUrl = url;
fuzzData.mainEntity = entityInfo;
} else if (num == 1) {
fuzzData.secondaryUrl = url;
fuzzData.secondaryEntity = entityInfo;
}
num++;
@ -421,6 +428,30 @@ xmlFuzzMainEntity(size_t *size) {
return(fuzzData.mainEntity->data);
}
/**
* xmlFuzzSecondaryUrl:
*
* Returns the secondary URL.
*/
const char *
xmlFuzzSecondaryUrl(void) {
return(fuzzData.secondaryUrl);
}
/**
* xmlFuzzSecondaryEntity:
* @size: size of the secondary entity in bytes
*
* Returns the secondary entity.
*/
const char *
xmlFuzzSecondaryEntity(size_t *size) {
if (fuzzData.secondaryEntity == NULL)
return(NULL);
*size = fuzzData.secondaryEntity->size;
return(fuzzData.secondaryEntity->data);
}
/**
* xmlFuzzResourceLoader:
*

View File

@ -112,6 +112,12 @@ xmlFuzzMainUrl(void);
const char *
xmlFuzzMainEntity(size_t *size);
const char *
xmlFuzzSecondaryUrl(void);
const char *
xmlFuzzSecondaryEntity(size_t *size);
int
xmlFuzzResourceLoader(void *data, const char *URL, const char *ID,
xmlResourceType type, int flags, xmlParserInputPtr *out);

View File

@ -86,14 +86,16 @@ fuzzResourceRecorder(void *data ATTRIBUTE_UNUSED, const char *URL,
}
} while (len > 0);
xmlFuzzWriteString(globalData.out, URL);
xmlFuzzWriteString(globalData.out,
(char *) xmlBufContent(in->buf->buffer));
data = xmlStrdup(xmlBufContent(in->buf->buffer));
if (data == NULL) {
fprintf(stderr, "Error allocating entity data\n");
xmlFreeInputStream(in);
return(XML_ERR_NO_MEMORY);
}
xmlFreeInputStream(in);
xmlHashAddEntry(globalData.entities, (const xmlChar *) URL,
globalData.entities);
xmlHashAddEntry(globalData.entities, (const xmlChar *) URL, data);
return(xmlNewInputFromUrl(URL, flags, out));
}
@ -105,9 +107,24 @@ fuzzRecorderInit(FILE *out) {
globalData.oldLoader = xmlGetExternalEntityLoader();
}
static void
fuzzRecorderWriteAndFree(void *entry, const xmlChar *file) {
char *data = entry;
xmlFuzzWriteString(globalData.out, (const char *) file);
xmlFuzzWriteString(globalData.out, data);
xmlFree(data);
}
static void
fuzzRecorderWrite(const char *file) {
xmlHashRemoveEntry(globalData.entities, (const xmlChar *) file,
fuzzRecorderWriteAndFree);
}
static void
fuzzRecorderCleanup(void) {
xmlHashFree(globalData.entities, NULL);
/* Write remaining entities (in random order). */
xmlHashFree(globalData.entities, fuzzRecorderWriteAndFree);
globalData.out = NULL;
globalData.entities = NULL;
globalData.oldLoader = NULL;
@ -174,6 +191,7 @@ processXml(const char *docFile, FILE *out) {
xmlFreeDoc(doc);
xmlFreeParserCtxt(ctxt);
fuzzRecorderWrite(docFile);
fuzzRecorderCleanup();
return(0);
@ -209,32 +227,7 @@ processHtml(const char *docFile, FILE *out) {
}
#endif
#ifdef HAVE_SCHEMA_FUZZER
static int
processSchema(const char *docFile, FILE *out) {
xmlSchemaPtr schema;
xmlSchemaParserCtxtPtr pctxt;
/* Max allocations. */
xmlFuzzWriteInt(out, 0, 4);
fuzzRecorderInit(out);
pctxt = xmlSchemaNewParserCtxt(docFile);
xmlSchemaSetParserStructuredErrors(pctxt, xmlFuzzSErrorFunc, NULL);
xmlSchemaSetResourceLoader(pctxt, fuzzResourceRecorder, NULL);
schema = xmlSchemaParse(pctxt);
xmlSchemaFreeParserCtxt(pctxt);
xmlSchemaFree(schema);
fuzzRecorderCleanup();
return(0);
}
#endif
#if defined(HAVE_HTML_FUZZER) || \
defined(HAVE_SCHEMA_FUZZER) || \
defined(HAVE_XML_FUZZER)
static int
processPattern(const char *pattern) {
@ -315,6 +308,175 @@ error:
}
#endif
#if defined(HAVE_SCHEMA_FUZZER)
static int
processSchema(const char *xsdFile, const char *xmlFile, FILE *out) {
xmlSchemaPtr schema;
xmlSchemaParserCtxtPtr pctxt;
/* Max allocations. */
xmlFuzzWriteInt(out, 0, 4);
fuzzRecorderInit(out);
pctxt = xmlSchemaNewParserCtxt(xsdFile);
xmlSchemaSetParserStructuredErrors(pctxt, xmlFuzzSErrorFunc, NULL);
xmlSchemaSetResourceLoader(pctxt, fuzzResourceRecorder, NULL);
schema = xmlSchemaParse(pctxt);
xmlSchemaFreeParserCtxt(pctxt);
if (schema != NULL) {
xmlSchemaValidCtxtPtr vctxt;
xmlParserCtxtPtr ctxt;
xmlDocPtr doc;
ctxt = xmlNewParserCtxt();
xmlCtxtSetErrorHandler(ctxt, xmlFuzzSErrorFunc, NULL);
xmlCtxtSetResourceLoader(ctxt, fuzzResourceRecorder, NULL);
doc = xmlCtxtReadFile(ctxt, xmlFile, NULL, XML_PARSE_NOENT);
xmlFreeParserCtxt(ctxt);
vctxt = xmlSchemaNewValidCtxt(schema);
xmlSchemaSetValidStructuredErrors(vctxt, xmlFuzzSErrorFunc, NULL);
xmlSchemaValidateDoc(vctxt, doc);
xmlSchemaFreeValidCtxt(vctxt);
xmlFreeDoc(doc);
xmlSchemaFree(schema);
}
fuzzRecorderWrite(xsdFile);
fuzzRecorderWrite(xmlFile);
fuzzRecorderCleanup();
return(0);
}
static int
processSchemaPattern(const char *pattern) {
glob_t globbuf;
int ret = 0;
int res;
size_t i;
res = glob(pattern, 0, NULL, &globbuf);
if (res == GLOB_NOMATCH)
return(0);
if (res != 0) {
fprintf(stderr, "couldn't match pattern %s\n", pattern);
return(-1);
}
for (i = 0; i < globbuf.gl_pathc; i++) {
glob_t globbuf2;
struct stat statbuf;
char xmlPattern[PATH_SIZE];
char *dirBuf = NULL;
char *baseBuf = NULL;
const char *path, *dir, *base;
size_t size, dirLen, baseLen, len, j;
path = globbuf.gl_pathv[i];
if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode)))
continue;
dirBuf = (char *) xmlCharStrdup(path);
baseBuf = (char *) xmlCharStrdup(path);
if ((dirBuf == NULL) || (baseBuf == NULL)) {
fprintf(stderr, "memory allocation failed\n");
ret = -1;
goto error;
}
dir = dirname(dirBuf);
dirLen = strlen(dir);
base = basename(baseBuf);
baseLen = strlen(base);
len = strlen(path);
if (len <= 5)
continue;
/* Strip .xsl or _0.xsd suffix */
if (len > 6 && path[len - 6] == '_')
len -= 6;
else
len -= 4;
size = snprintf(xmlPattern, sizeof(xmlPattern), "%.*s_*.xml",
(int) len, path);
if (size >= PATH_SIZE) {
fprintf(stderr, "creating path failed\n");
ret = -1;
goto error;
}
res = glob(xmlPattern, 0, NULL, &globbuf2);
if (res == GLOB_NOMATCH)
goto error;
if (res != 0) {
fprintf(stderr, "couldn't match pattern %s\n", xmlPattern);
ret = -1;
goto error;
}
for (j = 0; j < globbuf2.gl_pathc; j++) {
char outPath[PATH_SIZE];
const char *xmlFile;
FILE *out = NULL;
xmlFile = globbuf2.gl_pathv[j];
len = strlen(xmlFile);
if (len < dirLen + 7)
continue;
if (len >= 6 && xmlFile[len - 6] == '_')
size = snprintf(outPath, sizeof(outPath), "seed/%s/%.*s_%c",
globalData.fuzzer, (int) baseLen - 4, base,
xmlFile[len - 5]);
else
size = snprintf(outPath, sizeof(outPath), "seed/%s/%.*s",
globalData.fuzzer, (int) baseLen - 4, base);
if (size >= PATH_SIZE) {
fprintf(stderr, "creating path failed\n");
ret = -1;
continue;
}
out = fopen(outPath, "wb");
if (out == NULL) {
fprintf(stderr, "couldn't open %s for writing\n", outPath);
ret = -1;
continue;
}
if (chdir(dir) != 0) {
fprintf(stderr, "couldn't chdir to %s\n", dir);
ret = -1;
} else {
if (processSchema(base, xmlFile + dirLen + 1, out) != 0)
ret = -1;
}
fclose(out);
if (chdir(globalData.cwd) != 0) {
fprintf(stderr, "couldn't chdir to %s\n", globalData.cwd);
ret = -1;
break;
}
}
globfree(&globbuf2);
error:
xmlFree(dirBuf);
xmlFree(baseBuf);
}
globfree(&globbuf);
return(ret);
}
#endif
#ifdef HAVE_XPATH_FUZZER
static int
processXPath(const char *testDir, const char *prefix, const char *name,
@ -479,8 +641,7 @@ main(int argc, const char **argv) {
#endif
} else if (strcmp(fuzzer, "schema") == 0) {
#ifdef HAVE_SCHEMA_FUZZER
processArg = processPattern;
globalData.processFile = processSchema;
processArg = processSchemaPattern;
#endif
} else if (strcmp(fuzzer, "valid") == 0) {
#ifdef HAVE_VALID_FUZZER

View File

@ -24,9 +24,10 @@ LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED,
int
LLVMFuzzerTestOneInput(const char *data, size_t size) {
xmlSchemaParserCtxtPtr pctxt;
xmlSchemaPtr schema;
size_t failurePos;
if (size > 50000)
if (size > 200000)
return(0);
failurePos = xmlFuzzReadInt(4) % (size + 100);
@ -38,9 +39,30 @@ LLVMFuzzerTestOneInput(const char *data, size_t size) {
pctxt = xmlSchemaNewParserCtxt(xmlFuzzMainUrl());
xmlSchemaSetParserStructuredErrors(pctxt, xmlFuzzSErrorFunc, NULL);
xmlSchemaSetResourceLoader(pctxt, xmlFuzzResourceLoader, NULL);
xmlSchemaFree(xmlSchemaParse(pctxt));
schema = xmlSchemaParse(pctxt);
xmlSchemaFreeParserCtxt(pctxt);
if (schema != NULL) {
xmlSchemaValidCtxtPtr vctxt;
xmlParserCtxtPtr ctxt;
xmlDocPtr doc;
ctxt = xmlNewParserCtxt();
xmlCtxtSetErrorHandler(ctxt, xmlFuzzSErrorFunc, NULL);
xmlCtxtSetResourceLoader(ctxt, xmlFuzzResourceLoader, NULL);
doc = xmlCtxtReadFile(ctxt, xmlFuzzSecondaryUrl(), NULL,
XML_PARSE_NOENT);
xmlFreeParserCtxt(ctxt);
vctxt = xmlSchemaNewValidCtxt(schema);
xmlSchemaSetValidStructuredErrors(vctxt, xmlFuzzSErrorFunc, NULL);
xmlSchemaValidateDoc(vctxt, doc);
xmlSchemaFreeValidCtxt(vctxt);
xmlFreeDoc(doc);
xmlSchemaFree(schema);
}
xmlFuzzInjectFailure(0);
xmlFuzzDataCleanup();
xmlResetLastError();

View File

@ -158,13 +158,26 @@ testEntityLoader(void) {
"<!ENTITY ent SYSTEM \"ent.txt\">\\\n"
"ent.txt\\\n"
"Hello, world!\\\n";
const char *docBuffer;
const char *docBuffer, *url;
size_t docSize;
xmlDocPtr doc;
int ret = 0;
xmlFuzzDataInit(data, sizeof(data) - 1);
xmlFuzzReadEntities();
url = xmlFuzzMainUrl();
if (strcmp(url, "doc.xml") != 0) {
fprintf(stderr, "unexpected main url: %s\n", url);
ret = 1;
}
url = xmlFuzzSecondaryUrl();
if (strcmp(url, "doc.dtd") != 0) {
fprintf(stderr, "unexpected secondary url: %s\n", url);
ret = 1;
}
docBuffer = xmlFuzzMainEntity(&docSize);
ctxt = xmlNewParserCtxt();
xmlCtxtSetResourceLoader(ctxt, xmlFuzzResourceLoader, NULL);