mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-01-05 09:17:38 +03:00
0d5f3710fb
Implement file handling in C to speed up corpus generation.
408 lines
9.9 KiB
C
408 lines
9.9 KiB
C
/*
|
|
* xmlSeed.c: Generate the XML seed corpus for fuzzing.
|
|
*
|
|
* See Copyright for the status of this software.
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <glob.h>
|
|
#include <libgen.h>
|
|
#include <sys/stat.h>
|
|
|
|
#ifdef _WIN32
|
|
#include <direct.h>
|
|
#else
|
|
#include <unistd.h>
|
|
#endif
|
|
|
|
#include <libxml/parser.h>
|
|
#include <libxml/parserInternals.h>
|
|
#include <libxml/HTMLparser.h>
|
|
#include <libxml/xinclude.h>
|
|
#include <libxml/xmlschemas.h>
|
|
#include "fuzz.h"
|
|
|
|
#define PATH_SIZE 500
|
|
#define SEED_BUF_SIZE 16384
|
|
#define EXPR_SIZE 4500
|
|
|
|
typedef int
|
|
(*fileFunc)(const char *base, FILE *out);
|
|
|
|
typedef int
|
|
(*mainFunc)(const char *arg);
|
|
|
|
static struct {
|
|
FILE *out;
|
|
xmlHashTablePtr entities; /* Maps URLs to xmlFuzzEntityInfos */
|
|
xmlExternalEntityLoader oldLoader;
|
|
fileFunc processFile;
|
|
const char *fuzzer;
|
|
int counter;
|
|
char cwd[PATH_SIZE];
|
|
} globalData;
|
|
|
|
/*
|
|
* A custom entity loader that writes all external DTDs or entities to a
|
|
* single file in the format expected by xmlFuzzEntityLoader.
|
|
*/
|
|
static xmlParserInputPtr
|
|
fuzzEntityRecorder(const char *URL, const char *ID,
|
|
xmlParserCtxtPtr ctxt) {
|
|
xmlParserInputPtr in;
|
|
static const int chunkSize = 16384;
|
|
int len;
|
|
|
|
in = xmlNoNetExternalEntityLoader(URL, ID, ctxt);
|
|
if (in == NULL)
|
|
return(NULL);
|
|
|
|
if (globalData.entities == NULL) {
|
|
globalData.entities = xmlHashCreate(4);
|
|
} else if (xmlHashLookup(globalData.entities,
|
|
(const xmlChar *) URL) != NULL) {
|
|
return(in);
|
|
}
|
|
|
|
do {
|
|
len = xmlParserInputBufferGrow(in->buf, chunkSize);
|
|
if (len < 0) {
|
|
fprintf(stderr, "Error reading %s\n", URL);
|
|
xmlFreeInputStream(in);
|
|
return(NULL);
|
|
}
|
|
} while (len > 0);
|
|
|
|
xmlFuzzWriteString(globalData.out, URL);
|
|
xmlFuzzWriteString(globalData.out,
|
|
(char *) xmlBufContent(in->buf->buffer));
|
|
|
|
xmlFreeInputStream(in);
|
|
|
|
xmlHashAddEntry(globalData.entities, (const xmlChar *) URL, NULL);
|
|
|
|
return(xmlNoNetExternalEntityLoader(URL, ID, ctxt));
|
|
}
|
|
|
|
static void
|
|
fuzzRecorderInit(FILE *out) {
|
|
globalData.out = out;
|
|
globalData.entities = xmlHashCreate(8);
|
|
globalData.oldLoader = xmlGetExternalEntityLoader();
|
|
xmlSetExternalEntityLoader(fuzzEntityRecorder);
|
|
}
|
|
|
|
static void
|
|
fuzzRecorderCleanup() {
|
|
xmlSetExternalEntityLoader(globalData.oldLoader);
|
|
xmlHashFree(globalData.entities, xmlHashDefaultDeallocator);
|
|
globalData.out = NULL;
|
|
globalData.entities = NULL;
|
|
globalData.oldLoader = NULL;
|
|
}
|
|
|
|
static int
|
|
processXml(const char *docFile, FILE *out) {
|
|
int opts = XML_PARSE_NOENT | XML_PARSE_DTDLOAD;
|
|
xmlDocPtr doc;
|
|
|
|
fwrite(&opts, sizeof(opts), 1, out);
|
|
|
|
fuzzRecorderInit(out);
|
|
|
|
doc = xmlReadFile(docFile, NULL, opts);
|
|
xmlXIncludeProcessFlags(doc, opts);
|
|
xmlFreeDoc(doc);
|
|
|
|
fuzzRecorderCleanup();
|
|
|
|
return(0);
|
|
}
|
|
|
|
static int
|
|
processHtml(const char *docFile, FILE *out) {
|
|
char buf[SEED_BUF_SIZE];
|
|
FILE *file;
|
|
size_t size;
|
|
int opts = 0;
|
|
|
|
fwrite(&opts, sizeof(opts), 1, out);
|
|
|
|
/* Copy file */
|
|
file = fopen(docFile, "rb");
|
|
if (file == NULL) {
|
|
fprintf(stderr, "couldn't open %s\n", docFile);
|
|
return(0);
|
|
}
|
|
do {
|
|
size = fread(buf, 1, SEED_BUF_SIZE, file);
|
|
if (size > 0)
|
|
fwrite(buf, 1, size, out);
|
|
} while (size == SEED_BUF_SIZE);
|
|
fclose(file);
|
|
|
|
return(0);
|
|
}
|
|
|
|
static int
|
|
processSchema(const char *docFile, FILE *out) {
|
|
xmlSchemaPtr schema;
|
|
xmlSchemaParserCtxtPtr pctxt;
|
|
|
|
fuzzRecorderInit(out);
|
|
|
|
pctxt = xmlSchemaNewParserCtxt(docFile);
|
|
xmlSchemaSetParserErrors(pctxt, xmlFuzzErrorFunc, xmlFuzzErrorFunc, NULL);
|
|
schema = xmlSchemaParse(pctxt);
|
|
xmlSchemaFreeParserCtxt(pctxt);
|
|
xmlSchemaFree(schema);
|
|
|
|
fuzzRecorderCleanup();
|
|
|
|
return(0);
|
|
}
|
|
|
|
static int
|
|
processPattern(const char *pattern) {
|
|
glob_t globbuf;
|
|
int ret = 0;
|
|
int res, i;
|
|
|
|
res = glob(pattern, 0, NULL, &globbuf);
|
|
if (res == GLOB_NOMATCH)
|
|
return(0);
|
|
if (res != 0) {
|
|
fprintf(stderr, "couldn't match pattern %s\n", pattern);
|
|
return(-1);
|
|
}
|
|
|
|
for (i = 0; i < globbuf.gl_pathc; i++) {
|
|
struct stat statbuf;
|
|
char outPath[PATH_SIZE];
|
|
char *dirBuf = NULL;
|
|
char *baseBuf = NULL;
|
|
const char *path, *dir, *base;
|
|
FILE *out = NULL;
|
|
int dirChanged = 0;
|
|
size_t size;
|
|
|
|
path = globbuf.gl_pathv[i];
|
|
|
|
if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode)))
|
|
continue;
|
|
|
|
dirBuf = (char *) xmlCharStrdup(path);
|
|
baseBuf = (char *) xmlCharStrdup(path);
|
|
if ((dirBuf == NULL) || (baseBuf == NULL)) {
|
|
fprintf(stderr, "memory allocation failed\n");
|
|
ret = -1;
|
|
goto error;
|
|
}
|
|
dir = dirname(dirBuf);
|
|
base = basename(baseBuf);
|
|
|
|
size = snprintf(outPath, sizeof(outPath), "seed/%s/%s",
|
|
globalData.fuzzer, base);
|
|
if (size >= PATH_SIZE) {
|
|
fprintf(stderr, "creating path failed\n");
|
|
ret = -1;
|
|
goto error;
|
|
}
|
|
out = fopen(outPath, "wb");
|
|
if (out == NULL) {
|
|
fprintf(stderr, "couldn't open %s for writing\n", outPath);
|
|
ret = -1;
|
|
goto error;
|
|
}
|
|
if (chdir(dir) != 0) {
|
|
fprintf(stderr, "couldn't chdir to %s\n", dir);
|
|
ret = -1;
|
|
goto error;
|
|
}
|
|
dirChanged = 1;
|
|
if (globalData.processFile(base, out) != 0)
|
|
ret = -1;
|
|
|
|
error:
|
|
if (out != NULL)
|
|
fclose(out);
|
|
xmlFree(dirBuf);
|
|
xmlFree(baseBuf);
|
|
if ((dirChanged) && (chdir(globalData.cwd) != 0)) {
|
|
fprintf(stderr, "couldn't chdir to %s\n", globalData.cwd);
|
|
ret = -1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
globfree(&globbuf);
|
|
return(ret);
|
|
}
|
|
|
|
static int
|
|
processXPath(const char *testDir, const char *prefix, const char *name,
|
|
const char *data, const char *subdir, int xptr) {
|
|
char pattern[PATH_SIZE];
|
|
glob_t globbuf;
|
|
size_t i, size;
|
|
int ret = 0, res;
|
|
|
|
size = snprintf(pattern, sizeof(pattern), "%s/%s/%s*",
|
|
testDir, subdir, prefix);
|
|
if (size >= PATH_SIZE)
|
|
return(-1);
|
|
res = glob(pattern, 0, NULL, &globbuf);
|
|
if (res == GLOB_NOMATCH)
|
|
return(0);
|
|
if (res != 0) {
|
|
fprintf(stderr, "couldn't match pattern %s\n", pattern);
|
|
return(-1);
|
|
}
|
|
|
|
for (i = 0; i < globbuf.gl_pathc; i++) {
|
|
char *path = globbuf.gl_pathv[i];
|
|
struct stat statbuf;
|
|
FILE *in;
|
|
char expr[EXPR_SIZE];
|
|
|
|
if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode)))
|
|
continue;
|
|
|
|
in = fopen(path, "rb");
|
|
if (in == NULL) {
|
|
ret = -1;
|
|
continue;
|
|
}
|
|
|
|
while (fgets(expr, EXPR_SIZE, in) > 0) {
|
|
char outPath[PATH_SIZE];
|
|
FILE *out;
|
|
int j;
|
|
|
|
for (j = 0; expr[j] != 0; j++)
|
|
if (expr[j] == '\r' || expr[j] == '\n')
|
|
break;
|
|
expr[j] = 0;
|
|
|
|
size = snprintf(outPath, sizeof(outPath), "seed/xpath/%s-%d",
|
|
name, globalData.counter);
|
|
if (size >= PATH_SIZE) {
|
|
ret = -1;
|
|
continue;
|
|
}
|
|
out = fopen(outPath, "wb");
|
|
if (out == NULL) {
|
|
ret = -1;
|
|
continue;
|
|
}
|
|
|
|
if (xptr) {
|
|
xmlFuzzWriteString(out, expr);
|
|
} else {
|
|
char xptrExpr[EXPR_SIZE+100];
|
|
|
|
/* Wrap XPath expressions as XPointer */
|
|
snprintf(xptrExpr, sizeof(xptrExpr), "xpointer(%s)", expr);
|
|
xmlFuzzWriteString(out, xptrExpr);
|
|
}
|
|
|
|
xmlFuzzWriteString(out, data);
|
|
|
|
fclose(out);
|
|
globalData.counter++;
|
|
}
|
|
|
|
fclose(in);
|
|
}
|
|
|
|
globfree(&globbuf);
|
|
|
|
return(ret);
|
|
}
|
|
|
|
int
|
|
processXPathDir(const char *testDir) {
|
|
char pattern[PATH_SIZE];
|
|
glob_t globbuf;
|
|
size_t i, size;
|
|
int ret = 0;
|
|
|
|
globalData.counter = 1;
|
|
if (processXPath(testDir, "", "expr", "<d></d>", "expr", 0) != 0)
|
|
ret = -1;
|
|
|
|
size = snprintf(pattern, sizeof(pattern), "%s/docs/*", testDir);
|
|
if (size >= PATH_SIZE)
|
|
return(1);
|
|
if (glob(pattern, 0, NULL, &globbuf) != 0)
|
|
return(1);
|
|
|
|
for (i = 0; i < globbuf.gl_pathc; i++) {
|
|
char *path = globbuf.gl_pathv[i];
|
|
char *data;
|
|
const char *docFile;
|
|
|
|
data = xmlSlurpFile(path, NULL);
|
|
if (data == NULL) {
|
|
ret = -1;
|
|
continue;
|
|
}
|
|
docFile = basename(path);
|
|
|
|
globalData.counter = 1;
|
|
if (processXPath(testDir, docFile, docFile, data, "tests", 0) != 0)
|
|
ret = -1;
|
|
if (processXPath(testDir, docFile, docFile, data, "xptr", 1) != 0)
|
|
ret = -1;
|
|
|
|
xmlFree(data);
|
|
}
|
|
|
|
globfree(&globbuf);
|
|
|
|
return(ret);
|
|
}
|
|
|
|
int
|
|
main(int argc, const char **argv) {
|
|
mainFunc processArg = processPattern;
|
|
const char *fuzzer;
|
|
int ret = 0;
|
|
int xpath = 0;
|
|
int i;
|
|
|
|
if (argc < 3) {
|
|
fprintf(stderr, "usage: seed [FUZZER] [PATTERN...]\n");
|
|
return(1);
|
|
}
|
|
|
|
xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc);
|
|
|
|
fuzzer = argv[1];
|
|
if (strcmp(fuzzer, "html") == 0) {
|
|
globalData.processFile = processHtml;
|
|
} else if (strcmp(fuzzer, "schema") == 0) {
|
|
globalData.processFile = processSchema;
|
|
} else if (strcmp(fuzzer, "xml") == 0) {
|
|
globalData.processFile = processXml;
|
|
} else if (strcmp(fuzzer, "xpath") == 0) {
|
|
processArg = processXPathDir;
|
|
} else {
|
|
fprintf(stderr, "unknown fuzzer %s\n", fuzzer);
|
|
return(1);
|
|
}
|
|
globalData.fuzzer = fuzzer;
|
|
|
|
if (getcwd(globalData.cwd, PATH_SIZE) == NULL) {
|
|
fprintf(stderr, "couldn't get current directory\n");
|
|
return(1);
|
|
}
|
|
|
|
for (i = 2; i < argc; i++)
|
|
processArg(argv[i]);
|
|
|
|
return(ret);
|
|
}
|
|
|