From b3cb41be8b83b546257cd3757fa6c8b4d34a4cc9 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Mon, 13 May 2024 12:18:08 +0200 Subject: [PATCH] fuzz: Add xmllint fuzzer --- fuzz/.gitignore | 1 + fuzz/Makefile.am | 27 +++++- fuzz/api.c | 2 +- fuzz/fuzz.c | 27 +----- fuzz/fuzz.h | 7 +- fuzz/genSeed.c | 39 +++++++-- fuzz/lint.c | 214 ++++++++++++++++++++++++++++++++++++++++++++++ fuzz/reader.c | 2 +- fuzz/schema.c | 2 +- fuzz/testFuzzer.c | 2 +- fuzz/valid.c | 2 +- fuzz/xinclude.c | 2 +- fuzz/xml.c | 2 +- xmllint.c | 98 ++++++++++++++++++++- 14 files changed, 379 insertions(+), 48 deletions(-) create mode 100644 fuzz/lint.c diff --git a/fuzz/.gitignore b/fuzz/.gitignore index 40d11bf9..bf16acfd 100644 --- a/fuzz/.gitignore +++ b/fuzz/.gitignore @@ -2,6 +2,7 @@ api corpus/ genSeed html +lint reader regexp schema diff --git a/fuzz/Makefile.am b/fuzz/Makefile.am index 0db16ebe..e85452bc 100644 --- a/fuzz/Makefile.am +++ b/fuzz/Makefile.am @@ -1,6 +1,7 @@ AUTOMAKE_OPTIONS = -Wno-syntax EXTRA_PROGRAMS = genSeed \ - api html reader regexp schema uri valid xinclude xml xpath + api html lint reader regexp schema uri valid xinclude \ + xml xpath check_PROGRAMS = testFuzzer EXTRA_DIST = html.dict regexp.dict schema.dict xml.dict xpath.dict \ static_seed/uri static_seed/regexp fuzz.h @@ -29,9 +30,9 @@ testFuzzer_SOURCES = testFuzzer.c fuzz.c .PHONY: corpus clean-corpus -corpus: seed/html.stamp seed/reader.stamp seed/regexp.stamp seed/schema.stamp \ - seed/uri.stamp seed/valid.stamp seed/xinclude.stamp seed/xml.stamp \ - seed/xpath.stamp +corpus: seed/html.stamp seed/lint.stamp seed/reader.stamp seed/regexp.stamp \ + seed/schema.stamp seed/uri.stamp seed/valid.stamp seed/xinclude.stamp \ + seed/xml.stamp seed/xpath.stamp check-local: corpus ./testFuzzer$(EXEEXT) @@ -135,6 +136,24 @@ fuzz-reader: reader$(EXEEXT) seed/reader.stamp $$XML_FUZZ_OPTIONS \ corpus/reader seed/reader +# xmllint fuzzer + +seed/lint.stamp: genSeed$(EXEEXT) + @mkdir -p seed/lint + ./genSeed$(EXEEXT) lint $(XML_SEED_CORPUS_SRC) + @touch seed/lint.stamp + +lint_SOURCES = lint.c fuzz.c +lint_LDFLAGS = -fsanitize=fuzzer + +fuzz-lint: lint$(EXEEXT) seed/lint.stamp + @mkdir -p corpus/lint + ./lint$(EXEEXT) \ + -dict=xml.dict \ + -max_len=$(XML_MAX_LEN) \ + $$XML_FUZZ_OPTIONS \ + corpus/lint seed/lint + # API fuzzer api_SOURCES = api.c fuzz.c diff --git a/fuzz/api.c b/fuzz/api.c index 3ac5c1fc..80323ef4 100644 --- a/fuzz/api.c +++ b/fuzz/api.c @@ -964,7 +964,7 @@ LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED, xmlCatalogSetDefaults(XML_CATA_ALLOW_NONE); #endif xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc); - xmlSetExternalEntityLoader(xmlFuzzEntityLoader); + xmlParserInputBufferCreateFilenameDefault(xmlFuzzEntityLoader); return 0; } diff --git a/fuzz/fuzz.c b/fuzz/fuzz.c index 3d0dbe1e..557343ab 100644 --- a/fuzz/fuzz.c +++ b/fuzz/fuzz.c @@ -391,10 +391,8 @@ xmlFuzzMainEntity(size_t *size) { * * The entity loader for fuzz data. */ -xmlParserInputPtr -xmlFuzzEntityLoader(const char *URL, const char *ID ATTRIBUTE_UNUSED, - xmlParserCtxtPtr ctxt) { - xmlParserInputPtr input; +xmlParserInputBufferPtr +xmlFuzzEntityLoader(const char *URL, xmlCharEncoding enc) { xmlFuzzEntityInfo *entity; if (URL == NULL) @@ -403,26 +401,7 @@ xmlFuzzEntityLoader(const char *URL, const char *ID ATTRIBUTE_UNUSED, if (entity == NULL) return(NULL); - input = xmlNewInputStream(ctxt); - if (input == NULL) - return(NULL); - input->filename = (char *) xmlCharStrdup(URL); - if (input->filename == NULL) { - xmlCtxtErrMemory(ctxt); - xmlFreeInputStream(input); - return(NULL); - } - input->buf = xmlParserInputBufferCreateMem(entity->data, entity->size, - XML_CHAR_ENCODING_NONE); - if (input->buf == NULL) { - xmlCtxtErrMemory(ctxt); - xmlFreeInputStream(input); - return(NULL); - } - input->base = input->cur = xmlBufContent(input->buf->buffer); - input->end = input->base + xmlBufUse(input->buf->buffer); - - return input; + return(xmlParserInputBufferCreateMem(entity->data, entity->size, enc)); } char * diff --git a/fuzz/fuzz.h b/fuzz/fuzz.h index c9236688..592f4413 100644 --- a/fuzz/fuzz.h +++ b/fuzz/fuzz.h @@ -18,6 +18,9 @@ extern "C" { #if defined(LIBXML_HTML_ENABLED) #define HAVE_HTML_FUZZER #endif +#if 1 + #define HAVE_LINT_FUZZER +#endif #if defined(LIBXML_READER_ENABLED) #define HAVE_READER_FUZZER #endif @@ -101,8 +104,8 @@ xmlFuzzMainUrl(void); const char * xmlFuzzMainEntity(size_t *size); -xmlParserInputPtr -xmlFuzzEntityLoader(const char *URL, const char *ID, xmlParserCtxtPtr ctxt); +xmlParserInputBufferPtr +xmlFuzzEntityLoader(const char *URL, xmlCharEncoding enc); char * xmlSlurpFile(const char *path, size_t *size); diff --git a/fuzz/genSeed.c b/fuzz/genSeed.c index 38e2f750..334527eb 100644 --- a/fuzz/genSeed.c +++ b/fuzz/genSeed.c @@ -28,6 +28,7 @@ #define EXPR_SIZE 4500 #define FLAG_READER (1 << 0) +#define FLAG_LINT (1 << 1) typedef int (*fileFunc)(const char *base, FILE *out); @@ -115,14 +116,32 @@ processXml(const char *docFile, FILE *out) { int opts = XML_PARSE_NOENT | XML_PARSE_DTDLOAD; xmlDocPtr doc; - /* Parser options. */ - xmlFuzzWriteInt(out, opts, 4); - /* Max allocations. */ - xmlFuzzWriteInt(out, 0, 4); + if (globalData.flags & FLAG_LINT) { + /* Switches */ + xmlFuzzWriteInt(out, 0, 4); + xmlFuzzWriteInt(out, 0, 4); + /* maxmem */ + xmlFuzzWriteInt(out, 0, 4); + /* max-ampl */ + xmlFuzzWriteInt(out, 0, 1); + /* pretty */ + xmlFuzzWriteInt(out, 0, 1); + /* encode */ + xmlFuzzWriteString(out, ""); + /* pattern */ + xmlFuzzWriteString(out, ""); + /* xpath */ + xmlFuzzWriteString(out, ""); + } else { + /* Parser options. */ + xmlFuzzWriteInt(out, opts, 4); + /* Max allocations. */ + xmlFuzzWriteInt(out, 0, 4); - if (globalData.flags & FLAG_READER) { - /* Initial reader program with a couple of OP_READs */ - xmlFuzzWriteString(out, "\x01\x01\x01\x01\x01\x01\x01\x01"); + if (globalData.flags & FLAG_READER) { + /* Initial reader program with a couple of OP_READs */ + xmlFuzzWriteString(out, "\x01\x01\x01\x01\x01\x01\x01\x01"); + } } fuzzRecorderInit(out); @@ -423,6 +442,12 @@ main(int argc, const char **argv) { #ifdef HAVE_HTML_FUZZER processArg = processPattern; globalData.processFile = processHtml; +#endif + } else if (strcmp(fuzzer, "lint") == 0) { +#ifdef HAVE_LINT_FUZZER + processArg = processPattern; + globalData.flags |= FLAG_LINT; + globalData.processFile = processXml; #endif } else if (strcmp(fuzzer, "reader") == 0) { #ifdef HAVE_READER_FUZZER diff --git a/fuzz/lint.c b/fuzz/lint.c new file mode 100644 index 00000000..8a92829f --- /dev/null +++ b/fuzz/lint.c @@ -0,0 +1,214 @@ +/* + * xml.c: a libFuzzer target to test several XML parser interfaces. + * + * See Copyright for the status of this software. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "fuzz.h" + +#define XMLLINT_FUZZ +#include "../xmllint.c" + +static const char *const switches[] = { + "--auto", + "--c14n", + "--c14n11", + "--compress", + "--copy", + "--debug", + "--debugent", + "--dropdtd", + "--dtdattr", + "--exc-c14n", + "--format", + "--htmlout", + "--huge", + "--insert", + "--loaddtd", + "--load-trace", + "--memory", + "--noblanks", + "--nocdata", + "--nocompact", + "--nodefdtd", + "--nodict", + "--noenc", + "--noent", + "--nofixup-base-uris", + "--nonet", + "--noout", + "--nowarning", + "--nowrap", + "--noxincludenode", + "--nsclean", + "--oldxml10", + "--pedantic", + "--postvalid", + "--push", + "--pushsmall", + "--quiet", + "--recover", + "--sax1", + "--testIO", + "--timing", + "--valid", + "--version", + "--walker", + "--xinclude", + "--xmlout" +}; +static const size_t numSwitches = sizeof(switches) / sizeof(switches[0]); + +struct { + const char **argv; + size_t argi; +} vars; + +static void +pushArg(const char *str) { + vars.argv[vars.argi++] = str; +} + +int +LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED, + char ***argv ATTRIBUTE_UNUSED) { + int fd; + + /* Redirect stdout to /dev/null */ + fd = open("/dev/null", O_WRONLY); + if (fd == -1) { + perror("/dev/null"); + abort(); + } + if (dup2(fd, STDOUT_FILENO) == -1) { + perror("dup2"); + abort(); + } + close(fd); + + return 0; +} + +int +LLVMFuzzerTestOneInput(const char *data, size_t size) { + char maxmemBuf[20]; + char maxAmplBuf[20]; + char prettyBuf[20]; + const char *sval, *docBuffer, *docUrl; + size_t ssize, docSize, i; + unsigned uval; + int ival; + + vars.argv = malloc((numSwitches + 5 + 6 * 2) * sizeof(vars.argv[0])); + vars.argi = 0; + pushArg("xmllint"), + pushArg("--nocatalogs"); + + xmlFuzzDataInit(data, size); + + for (i = 0; i < numSwitches; i++) { + if (i % 32 == 0) + uval = xmlFuzzReadInt(4); + if ((uval & 1) && (switches[i] != NULL)) + pushArg(switches[i]); + uval >>= 1; + } + + /* + * Use four main parsing modes with equal probability + */ + switch (uval & 3) { + case 0: + /* XML parser */ + break; + case 1: + /* HTML parser */ + pushArg("--html"); + break; + case 2: + /* XML reader */ + pushArg("--stream"); + break; + case 3: + /* SAX parser */ + pushArg("--sax"); + break; + } + + uval = xmlFuzzReadInt(4); + if (uval > 0) { + if (size <= (INT_MAX - 2000) / 20) + uval %= size * 20 + 2000; + else + uval %= INT_MAX; + snprintf(maxmemBuf, 20, "%u", uval); + pushArg("--maxmem"); + pushArg(maxmemBuf); + } + + ival = xmlFuzzReadInt(1); + if (ival >= 1 && ival <= 5) { + snprintf(maxAmplBuf, 20, "%d", ival); + pushArg("--max-ampl"); + pushArg(maxAmplBuf); + } + + ival = xmlFuzzReadInt(1); + if (ival != 0) { + snprintf(prettyBuf, 20, "%d", ival - 128); + pushArg("--pretty"); + pushArg(prettyBuf); + } + + sval = xmlFuzzReadString(&ssize); + if (ssize > 0) { + pushArg("--encode"); + pushArg(sval); + } + + sval = xmlFuzzReadString(&ssize); + if (ssize > 0) { + pushArg("--pattern"); + pushArg(sval); + } + + sval = xmlFuzzReadString(&ssize); + if (ssize > 0) { + pushArg("--xpath"); + pushArg(sval); + } + + xmlFuzzReadEntities(); + docBuffer = xmlFuzzMainEntity(&docSize); + docUrl = xmlFuzzMainUrl(); + if (docBuffer == NULL || docUrl[0] == '-') + goto exit; + pushArg(docUrl); + + pushArg(NULL); + + xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc); + xmlParserInputBufferCreateFilenameDefault(xmlFuzzEntityLoader); +#ifdef LIBXML_CATALOG_ENABLED + xmlCatalogSetDefaults(XML_CATA_ALLOW_NONE); +#endif + + xmllintMain(vars.argi - 1, vars.argv); + + xmlMemSetup(free, malloc, realloc, xmlMemStrdup); + +exit: + xmlFuzzDataCleanup(); + free(vars.argv); + return(0); +} diff --git a/fuzz/reader.c b/fuzz/reader.c index 5721d356..7935d238 100644 --- a/fuzz/reader.c +++ b/fuzz/reader.c @@ -102,7 +102,7 @@ LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED, xmlCatalogSetDefaults(XML_CATA_ALLOW_NONE); #endif xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc); - xmlSetExternalEntityLoader(xmlFuzzEntityLoader); + xmlParserInputBufferCreateFilenameDefault(xmlFuzzEntityLoader); return 0; } diff --git a/fuzz/schema.c b/fuzz/schema.c index 611deeca..a9111fca 100644 --- a/fuzz/schema.c +++ b/fuzz/schema.c @@ -18,7 +18,7 @@ LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED, xmlCatalogSetDefaults(XML_CATA_ALLOW_NONE); #endif xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc); - xmlSetExternalEntityLoader(xmlFuzzEntityLoader); + xmlParserInputBufferCreateFilenameDefault(xmlFuzzEntityLoader); return 0; } diff --git a/fuzz/testFuzzer.c b/fuzz/testFuzzer.c index 0b6aa1da..a0543e5b 100644 --- a/fuzz/testFuzzer.c +++ b/fuzz/testFuzzer.c @@ -162,7 +162,7 @@ testEntityLoader(void) { xmlDocPtr doc; int ret = 0; - xmlSetExternalEntityLoader(xmlFuzzEntityLoader); + xmlParserInputBufferCreateFilenameDefault(xmlFuzzEntityLoader); xmlFuzzDataInit(data, sizeof(data) - 1); xmlFuzzReadEntities(); diff --git a/fuzz/valid.c b/fuzz/valid.c index 67b2fb24..df896df0 100644 --- a/fuzz/valid.c +++ b/fuzz/valid.c @@ -20,7 +20,7 @@ LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED, xmlCatalogSetDefaults(XML_CATA_ALLOW_NONE); #endif xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc); - xmlSetExternalEntityLoader(xmlFuzzEntityLoader); + xmlParserInputBufferCreateFilenameDefault(xmlFuzzEntityLoader); return 0; } diff --git a/fuzz/xinclude.c b/fuzz/xinclude.c index e38a69e0..53bef1cc 100644 --- a/fuzz/xinclude.c +++ b/fuzz/xinclude.c @@ -21,7 +21,7 @@ LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED, xmlCatalogSetDefaults(XML_CATA_ALLOW_NONE); #endif xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc); - xmlSetExternalEntityLoader(xmlFuzzEntityLoader); + xmlParserInputBufferCreateFilenameDefault(xmlFuzzEntityLoader); return 0; } diff --git a/fuzz/xml.c b/fuzz/xml.c index 31ab2102..ef017c86 100644 --- a/fuzz/xml.c +++ b/fuzz/xml.c @@ -21,7 +21,7 @@ LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED, xmlCatalogSetDefaults(XML_CATA_ALLOW_NONE); #endif xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc); - xmlSetExternalEntityLoader(xmlFuzzEntityLoader); + xmlParserInputBufferCreateFilenameDefault(xmlFuzzEntityLoader); return 0; } diff --git a/xmllint.c b/xmllint.c index 51ade4bf..4bdf5439 100644 --- a/xmllint.c +++ b/xmllint.c @@ -81,7 +81,11 @@ #include #endif -#define ERR_STREAM stderr +#ifdef XMLLINT_FUZZ + #define ERR_STREAM stdout +#else + #define ERR_STREAM stderr +#endif #ifndef XML_XML_DEFAULT_CATALOG #define XML_XML_DEFAULT_CATALOG "file://" SYSCONFDIR "/xml/catalog" @@ -302,6 +306,7 @@ xmllintExternalEntityLoader(const char *URL, const char *ID, } return(NULL); } + /************************************************************************ * * * Memory allocation consumption debugging * @@ -356,7 +361,7 @@ myStrdupFunc(const char *str) if (ret != NULL) { if (xmlMemUsed() > maxmem) { OOM(); - xmlFree(ret); + xmlMemFree(ret); return (NULL); } } @@ -3097,8 +3102,8 @@ skipArgs(const char *arg) { return(0); } -int -main(int argc, char **argv) { +static int +xmllintMain(int argc, const char **argv) { int i, acount; int files = 0; int version = 0; @@ -3112,6 +3117,82 @@ main(int argc, char **argv) { int nocatalogs = 0; #endif +#ifdef XMLLINT_FUZZ +#ifdef LIBXML_DEBUG_ENABLED + shell = 0; + debugent = 0; +#endif + debug = 0; + maxmem = 0; +#ifdef LIBXML_TREE_ENABLED + copy = 0; +#endif /* LIBXML_TREE_ENABLED */ + noout = 0; +#ifdef LIBXML_OUTPUT_ENABLED + format = 0; + output = NULL; + compress = 0; +#endif /* LIBXML_OUTPUT_ENABLED */ +#ifdef LIBXML_VALID_ENABLED + postvalid = 0; + dtdvalid = NULL; + dtdvalidfpi = NULL; + insert = 0; +#endif +#ifdef LIBXML_SCHEMAS_ENABLED + relaxng = NULL; + relaxngschemas = NULL; + schema = NULL; + wxschemas = NULL; +#endif +#ifdef LIBXML_SCHEMATRON_ENABLED + schematron = NULL; + wxschematron = NULL; +#endif + repeat = 0; +#if defined(LIBXML_HTML_ENABLED) + html = 0; + xmlout = 0; +#endif + htmlout = 0; +#ifdef LIBXML_PUSH_ENABLED + push = 0; + pushsize = 4096; +#endif /* LIBXML_PUSH_ENABLED */ +#ifdef HAVE_MMAP + memory = 0; +#endif + testIO = 0; + encoding = NULL; +#ifdef LIBXML_XINCLUDE_ENABLED + xinclude = 0; +#endif + progresult = XMLLINT_RETURN_OK; + quiet = 0; + timing = 0; + generate = 0; + dropdtd = 0; +#ifdef LIBXML_C14N_ENABLED + canonical = 0; + canonical_11 = 0; + exc_canonical = 0; +#endif +#ifdef LIBXML_READER_ENABLED + walker = 0; +#ifdef LIBXML_PATTERN_ENABLED + pattern = NULL; + patternc = NULL; + patstream = NULL; +#endif +#endif /* LIBXML_READER_ENABLED */ +#ifdef LIBXML_XPATH_ENABLED + xpathquery = NULL; +#endif + options = XML_PARSE_COMPACT | XML_PARSE_BIG_LINES; + maxAmpl = 0; + defaultEntityLoader = NULL; +#endif /* XMLLINT_FUZZ */ + if (argc <= 1) { usage(ERR_STREAM, argv[0]); return(XMLLINT_ERR_UNCLASS); @@ -3682,8 +3763,17 @@ main(int argc, char **argv) { goto error; error: + if (defaultEntityLoader != NULL) + xmlSetExternalEntityLoader(defaultEntityLoader); xmlCleanupParser(); return(progresult); } +#ifndef XMLLINT_FUZZ +int +main(int argc, char **argv) { + return(xmllintMain(argc, (const char **) argv)); +} +#endif +