1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-01-12 09:17:37 +03:00
libxml2/fuzz/genSeed.c

515 lines
13 KiB
C
Raw Normal View History

/*
* xmlSeed.c: Generate the XML seed corpus for fuzzing.
*
* See Copyright for the status of this software.
*/
#include <stdio.h>
#include <string.h>
#include <glob.h>
#include <libgen.h>
#include <sys/stat.h>
#ifdef _WIN32
#include <direct.h>
#else
#include <unistd.h>
#endif
#include <libxml/parser.h>
#include <libxml/parserInternals.h>
#include <libxml/HTMLparser.h>
#include <libxml/xinclude.h>
#include <libxml/xmlschemas.h>
#include "fuzz.h"
#define PATH_SIZE 500
#define SEED_BUF_SIZE 16384
#define EXPR_SIZE 4500
2024-04-16 14:24:12 +03:00
#define FLAG_READER (1 << 0)
2024-05-13 13:18:08 +03:00
#define FLAG_LINT (1 << 1)
2024-04-16 14:24:12 +03:00
typedef int
(*fileFunc)(const char *base, FILE *out);
typedef int
(*mainFunc)(const char *arg);
static struct {
FILE *out;
xmlHashTablePtr entities; /* Maps URLs to xmlFuzzEntityInfos */
xmlExternalEntityLoader oldLoader;
fileFunc processFile;
const char *fuzzer;
int counter;
char cwd[PATH_SIZE];
2024-04-16 14:24:12 +03:00
int flags;
} globalData;
2022-09-02 18:47:48 +03:00
#if defined(HAVE_SCHEMA_FUZZER) || \
defined(HAVE_XML_FUZZER)
/*
* A custom resource loader that writes all external DTDs or entities to a
* single file in the format expected by xmlFuzzResourceLoader.
*/
static int
fuzzResourceRecorder(void *data ATTRIBUTE_UNUSED, const char *URL,
const char *ID ATTRIBUTE_UNUSED,
xmlResourceType type ATTRIBUTE_UNUSED, int flags,
xmlParserInputPtr *out) {
xmlParserInputPtr in;
static const int chunkSize = 16384;
int code, len;
*out = NULL;
code = xmlInputCreateUrl(URL, flags, &in);
if (code != XML_ERR_OK)
return(code);
if (globalData.entities == NULL) {
globalData.entities = xmlHashCreate(4);
} else if (xmlHashLookup(globalData.entities,
(const xmlChar *) URL) != NULL) {
*out = in;
return(XML_ERR_OK);
}
do {
len = xmlParserInputBufferGrow(in->buf, chunkSize);
if (len < 0) {
fprintf(stderr, "Error reading %s\n", URL);
xmlFreeInputStream(in);
return(in->buf->error);
}
} while (len > 0);
xmlFuzzWriteString(globalData.out, URL);
xmlFuzzWriteString(globalData.out,
(char *) xmlBufContent(in->buf->buffer));
xmlFreeInputStream(in);
xmlHashAddEntry(globalData.entities, (const xmlChar *) URL,
globalData.entities);
return(xmlInputCreateUrl(URL, flags, out));
}
static void
fuzzRecorderInit(FILE *out) {
globalData.out = out;
globalData.entities = xmlHashCreate(8);
globalData.oldLoader = xmlGetExternalEntityLoader();
}
static void
2022-09-02 18:47:48 +03:00
fuzzRecorderCleanup(void) {
xmlHashFree(globalData.entities, NULL);
globalData.out = NULL;
globalData.entities = NULL;
globalData.oldLoader = NULL;
}
2022-09-02 18:47:48 +03:00
#endif
#ifdef HAVE_XML_FUZZER
static int
processXml(const char *docFile, FILE *out) {
int opts = XML_PARSE_NOENT | XML_PARSE_DTDLOAD;
xmlParserCtxtPtr ctxt;
xmlDocPtr doc;
2024-05-13 13:18:08 +03:00
if (globalData.flags & FLAG_LINT) {
/* Switches */
xmlFuzzWriteInt(out, 0, 4);
xmlFuzzWriteInt(out, 0, 4);
/* maxmem */
xmlFuzzWriteInt(out, 0, 4);
/* max-ampl */
xmlFuzzWriteInt(out, 0, 1);
/* pretty */
xmlFuzzWriteInt(out, 0, 1);
/* encode */
xmlFuzzWriteString(out, "");
/* pattern */
xmlFuzzWriteString(out, "");
/* xpath */
xmlFuzzWriteString(out, "");
} else {
/* Parser options. */
xmlFuzzWriteInt(out, opts, 4);
/* Max allocations. */
xmlFuzzWriteInt(out, 0, 4);
if (globalData.flags & FLAG_READER) {
/* Initial reader program with a couple of OP_READs */
xmlFuzzWriteString(out, "\x01\x01\x01\x01\x01\x01\x01\x01");
}
2024-04-16 14:24:12 +03:00
}
fuzzRecorderInit(out);
ctxt = xmlNewParserCtxt();
xmlCtxtSetErrorHandler(ctxt, xmlFuzzSErrorFunc, NULL);
xmlCtxtSetResourceLoader(ctxt, fuzzResourceRecorder, NULL);
doc = xmlCtxtReadFile(ctxt, docFile, NULL, opts);
#ifdef LIBXML_XINCLUDE_ENABLED
{
xmlXIncludeCtxtPtr xinc = xmlXIncludeNewContext(doc);
xmlXIncludeSetErrorHandler(xinc, xmlFuzzSErrorFunc, NULL);
xmlXIncludeSetResourceLoader(xinc, fuzzResourceRecorder, NULL);
xmlXIncludeSetFlags(xinc, opts);
xmlXIncludeProcessNode(xinc, (xmlNodePtr) doc);
xmlXIncludeFreeContext(xinc);
}
#endif
xmlFreeDoc(doc);
xmlFreeParserCtxt(ctxt);
fuzzRecorderCleanup();
return(0);
}
#endif
#ifdef HAVE_HTML_FUZZER
static int
processHtml(const char *docFile, FILE *out) {
char buf[SEED_BUF_SIZE];
FILE *file;
size_t size;
/* Parser options. */
xmlFuzzWriteInt(out, 0, 4);
/* Max allocations. */
xmlFuzzWriteInt(out, 0, 4);
/* Copy file */
file = fopen(docFile, "rb");
if (file == NULL) {
fprintf(stderr, "couldn't open %s\n", docFile);
return(0);
}
do {
size = fread(buf, 1, SEED_BUF_SIZE, file);
if (size > 0)
fwrite(buf, 1, size, out);
} while (size == SEED_BUF_SIZE);
fclose(file);
return(0);
}
#endif
#ifdef HAVE_SCHEMA_FUZZER
static int
processSchema(const char *docFile, FILE *out) {
xmlSchemaPtr schema;
xmlSchemaParserCtxtPtr pctxt;
/* Max allocations. */
xmlFuzzWriteInt(out, 0, 4);
fuzzRecorderInit(out);
pctxt = xmlSchemaNewParserCtxt(docFile);
xmlSchemaSetParserStructuredErrors(pctxt, xmlFuzzSErrorFunc, NULL);
xmlSchemaSetResourceLoader(pctxt, fuzzResourceRecorder, NULL);
schema = xmlSchemaParse(pctxt);
xmlSchemaFreeParserCtxt(pctxt);
xmlSchemaFree(schema);
fuzzRecorderCleanup();
return(0);
}
#endif
2022-09-02 18:47:48 +03:00
#if defined(HAVE_HTML_FUZZER) || \
defined(HAVE_SCHEMA_FUZZER) || \
defined(HAVE_XML_FUZZER)
static int
processPattern(const char *pattern) {
glob_t globbuf;
int ret = 0;
2022-09-02 18:47:48 +03:00
int res;
size_t i;
res = glob(pattern, 0, NULL, &globbuf);
if (res == GLOB_NOMATCH)
return(0);
if (res != 0) {
fprintf(stderr, "couldn't match pattern %s\n", pattern);
return(-1);
}
for (i = 0; i < globbuf.gl_pathc; i++) {
struct stat statbuf;
char outPath[PATH_SIZE];
char *dirBuf = NULL;
char *baseBuf = NULL;
const char *path, *dir, *base;
FILE *out = NULL;
int dirChanged = 0;
size_t size;
path = globbuf.gl_pathv[i];
if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode)))
continue;
dirBuf = (char *) xmlCharStrdup(path);
baseBuf = (char *) xmlCharStrdup(path);
if ((dirBuf == NULL) || (baseBuf == NULL)) {
fprintf(stderr, "memory allocation failed\n");
ret = -1;
goto error;
}
dir = dirname(dirBuf);
base = basename(baseBuf);
size = snprintf(outPath, sizeof(outPath), "seed/%s/%s",
globalData.fuzzer, base);
if (size >= PATH_SIZE) {
fprintf(stderr, "creating path failed\n");
ret = -1;
goto error;
}
out = fopen(outPath, "wb");
if (out == NULL) {
fprintf(stderr, "couldn't open %s for writing\n", outPath);
ret = -1;
goto error;
}
if (chdir(dir) != 0) {
fprintf(stderr, "couldn't chdir to %s\n", dir);
ret = -1;
goto error;
}
dirChanged = 1;
if (globalData.processFile(base, out) != 0)
ret = -1;
error:
if (out != NULL)
fclose(out);
xmlFree(dirBuf);
xmlFree(baseBuf);
if ((dirChanged) && (chdir(globalData.cwd) != 0)) {
fprintf(stderr, "couldn't chdir to %s\n", globalData.cwd);
ret = -1;
break;
}
}
globfree(&globbuf);
return(ret);
}
2022-09-02 18:47:48 +03:00
#endif
#ifdef HAVE_XPATH_FUZZER
static int
processXPath(const char *testDir, const char *prefix, const char *name,
const char *data, const char *subdir, int xptr) {
char pattern[PATH_SIZE];
glob_t globbuf;
size_t i, size;
int ret = 0, res;
size = snprintf(pattern, sizeof(pattern), "%s/%s/%s*",
testDir, subdir, prefix);
if (size >= PATH_SIZE)
return(-1);
res = glob(pattern, 0, NULL, &globbuf);
if (res == GLOB_NOMATCH)
return(0);
if (res != 0) {
fprintf(stderr, "couldn't match pattern %s\n", pattern);
return(-1);
}
for (i = 0; i < globbuf.gl_pathc; i++) {
char *path = globbuf.gl_pathv[i];
struct stat statbuf;
FILE *in;
char expr[EXPR_SIZE];
if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode)))
continue;
in = fopen(path, "rb");
if (in == NULL) {
ret = -1;
continue;
}
2022-09-02 18:47:48 +03:00
while (fgets(expr, EXPR_SIZE, in) != NULL) {
char outPath[PATH_SIZE];
FILE *out;
int j;
for (j = 0; expr[j] != 0; j++)
if (expr[j] == '\r' || expr[j] == '\n')
break;
expr[j] = 0;
size = snprintf(outPath, sizeof(outPath), "seed/xpath/%s-%d",
name, globalData.counter);
if (size >= PATH_SIZE) {
ret = -1;
continue;
}
out = fopen(outPath, "wb");
if (out == NULL) {
ret = -1;
continue;
}
/* Max allocations. */
xmlFuzzWriteInt(out, 0, 4);
if (xptr) {
xmlFuzzWriteString(out, expr);
} else {
char xptrExpr[EXPR_SIZE+100];
/* Wrap XPath expressions as XPointer */
snprintf(xptrExpr, sizeof(xptrExpr), "xpointer(%s)", expr);
xmlFuzzWriteString(out, xptrExpr);
}
xmlFuzzWriteString(out, data);
fclose(out);
globalData.counter++;
}
fclose(in);
}
globfree(&globbuf);
return(ret);
}
2022-09-02 18:47:48 +03:00
static int
processXPathDir(const char *testDir) {
char pattern[PATH_SIZE];
glob_t globbuf;
size_t i, size;
int ret = 0;
globalData.counter = 1;
if (processXPath(testDir, "", "expr", "<d></d>", "expr", 0) != 0)
ret = -1;
size = snprintf(pattern, sizeof(pattern), "%s/docs/*", testDir);
if (size >= PATH_SIZE)
return(1);
if (glob(pattern, 0, NULL, &globbuf) != 0)
return(1);
for (i = 0; i < globbuf.gl_pathc; i++) {
char *path = globbuf.gl_pathv[i];
char *data;
const char *docFile;
data = xmlSlurpFile(path, NULL);
if (data == NULL) {
ret = -1;
continue;
}
docFile = basename(path);
globalData.counter = 1;
if (processXPath(testDir, docFile, docFile, data, "tests", 0) != 0)
ret = -1;
if (processXPath(testDir, docFile, docFile, data, "xptr", 1) != 0)
ret = -1;
if (processXPath(testDir, docFile, docFile, data, "xptr-xp1", 1) != 0)
ret = -1;
xmlFree(data);
}
globfree(&globbuf);
return(ret);
}
#endif
int
main(int argc, const char **argv) {
mainFunc processArg = NULL;
const char *fuzzer;
int ret = 0;
int i;
if (argc < 3) {
fprintf(stderr, "usage: seed [FUZZER] [PATTERN...]\n");
return(1);
}
fuzzer = argv[1];
if (strcmp(fuzzer, "html") == 0) {
#ifdef HAVE_HTML_FUZZER
processArg = processPattern;
globalData.processFile = processHtml;
2024-05-13 13:18:08 +03:00
#endif
} else if (strcmp(fuzzer, "lint") == 0) {
#ifdef HAVE_LINT_FUZZER
processArg = processPattern;
globalData.flags |= FLAG_LINT;
globalData.processFile = processXml;
2024-04-16 14:24:12 +03:00
#endif
} else if (strcmp(fuzzer, "reader") == 0) {
#ifdef HAVE_READER_FUZZER
processArg = processPattern;
globalData.flags |= FLAG_READER;
globalData.processFile = processXml;
#endif
} else if (strcmp(fuzzer, "schema") == 0) {
#ifdef HAVE_SCHEMA_FUZZER
processArg = processPattern;
globalData.processFile = processSchema;
#endif
} else if (strcmp(fuzzer, "valid") == 0) {
#ifdef HAVE_VALID_FUZZER
processArg = processPattern;
globalData.processFile = processXml;
#endif
} else if (strcmp(fuzzer, "xinclude") == 0) {
#ifdef HAVE_XINCLUDE_FUZZER
processArg = processPattern;
globalData.processFile = processXml;
#endif
} else if (strcmp(fuzzer, "xml") == 0) {
#ifdef HAVE_XML_FUZZER
processArg = processPattern;
globalData.processFile = processXml;
#endif
} else if (strcmp(fuzzer, "xpath") == 0) {
#ifdef HAVE_XPATH_FUZZER
processArg = processXPathDir;
#endif
} else {
fprintf(stderr, "unknown fuzzer %s\n", fuzzer);
return(1);
}
globalData.fuzzer = fuzzer;
if (getcwd(globalData.cwd, PATH_SIZE) == NULL) {
fprintf(stderr, "couldn't get current directory\n");
return(1);
}
if (processArg != NULL)
for (i = 2; i < argc; i++)
processArg(argv[i]);
return(ret);
}