1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2024-12-24 21:33:51 +03:00

Add a couple of libFuzzer targets

- XML fuzzer
  Currently tests the pull parser, push parser and reader, as well as
  serialization. Supports splitting fuzz data into multiple documents
  for things like external DTDs or entities. The seed corpus is built
  from parts of the test suite.

- Regexp fuzzer
  Seed corpus was statically generated from test suite.

- URI fuzzer
  Tests parsing and most other functions from uri.c.
This commit is contained in:
Nick Wellnhofer 2020-06-05 12:49:25 +02:00
parent 2e8cc66d8f
commit 00ed736eec
137 changed files with 850 additions and 2 deletions

View File

@ -2,7 +2,7 @@
ACLOCAL_AMFLAGS = -I m4
SUBDIRS = include . doc example xstc $(PYTHON_SUBDIR)
SUBDIRS = include . doc example fuzz xstc $(PYTHON_SUBDIR)
DIST_SUBDIRS = include . doc example python xstc
@ -210,6 +210,7 @@ runtests: runtest$(EXEEXT) testrecurse$(EXEEXT) testapi$(EXEEXT) \
$(CHECKER) ./runxmlconf$(EXEEXT)
@(if [ "$(PYTHON_SUBDIR)" != "" ] ; then cd python ; \
$(MAKE) tests ; fi)
@cd fuzz; $(MAKE) tests
check: all runtests

View File

@ -1714,7 +1714,7 @@ rm -f COPYING.LIB COPYING
ln -s $srcdir/Copyright COPYING
# keep on one line for cygwin c.f. #130896
AC_CONFIG_FILES([libxml2.spec:libxml.spec.in Makefile include/Makefile include/libxml/Makefile doc/Makefile doc/examples/Makefile doc/devhelp/Makefile example/Makefile python/Makefile python/tests/Makefile xstc/Makefile include/libxml/xmlversion.h libxml-2.0.pc libxml-2.0-uninstalled.pc libxml2-config.cmake])
AC_CONFIG_FILES([libxml2.spec:libxml.spec.in Makefile include/Makefile include/libxml/Makefile doc/Makefile doc/examples/Makefile doc/devhelp/Makefile example/Makefile fuzz/Makefile python/Makefile python/tests/Makefile xstc/Makefile include/libxml/xmlversion.h libxml-2.0.pc libxml-2.0-uninstalled.pc libxml2-config.cmake])
AC_CONFIG_FILES([python/setup.py], [chmod +x python/setup.py])
AC_CONFIG_FILES([xml2-config], [chmod +x xml2-config])
AC_OUTPUT

7
fuzz/.gitignore vendored Normal file
View File

@ -0,0 +1,7 @@
corpus/
regexp
seed/xml*
testFuzzer
uri
xml
xmlSeed

75
fuzz/Makefile.am Normal file
View File

@ -0,0 +1,75 @@
EXTRA_PROGRAMS = regexp uri xml xmlSeed
check_PROGRAMS = testFuzzer
CLEANFILES = $(EXTRA_PROGRAMS)
AM_CPPFLAGS = -I$(top_srcdir)/include
DEPENDENCIES = $(top_builddir)/libxml2.la
LDADD = $(STATIC_BINARIES) $(top_builddir)/libxml2.la $(THREAD_LIBS) $(Z_LIBS) $(LZMA_LIBS) $(ICONV_LIBS) $(M_LIBS) $(WIN32_EXTRA_LIBADD)
PARSER_FUZZER_MAX_LEN = 100000
XML_SEED_CORPUS_SRC = \
$(top_srcdir)/test/* \
$(top_srcdir)/test/errors/*.xml \
$(top_srcdir)/test/errors10/*.xml \
$(top_srcdir)/test/namespaces/* \
$(top_srcdir)/test/valid/*.xml \
$(top_srcdir)/test/xmlid/* \
$(top_srcdir)/test/VC/* \
$(top_srcdir)/test/VCM/*
xmlSeed_SOURCES = xmlSeed.c fuzz.c
seed/xml.stamp: xmlSeed$(EXEEXT)
@mkdir -p seed/xml
@for i in $(XML_SEED_CORPUS_SRC); do \
if [ -f $$i ]; then \
echo Processing seed $$i; \
base=$$(basename $$i) \
outfile=$(abs_builddir)/seed/xml/$$base; \
pushd $$(dirname $$i) >/dev/null; \
$(abs_builddir)/xmlSeed$(EXEEXT) $$base > $$outfile; \
popd >/dev/null; \
if [ "$$(wc -c < $$outfile)" -gt $(PARSER_FUZZER_MAX_LEN) ]; then \
rm $$outfile; \
fi; \
fi; \
done
@touch seed/xml.stamp
testFuzzer_SOURCES = testFuzzer.c fuzz.c
tests: testFuzzer$(EXEEXT)
@echo "## Running fuzzer tests"
@./testFuzzer$(EXEEXT)
xml_SOURCES = xml.c fuzz.c
xml_LDFLAGS = -fsanitize=fuzzer
fuzz-xml: xml$(EXEEXT) seed/xml.stamp
@mkdir -p corpus/xml
./xml$(EXEEXT) \
-dict=xml.dict \
-max_len=$(PARSER_FUZZER_MAX_LEN) \
-timeout=20 \
corpus/xml seed/xml
regexp_SOURCES = regexp.c fuzz.c
regexp_LDFLAGS = -fsanitize=fuzzer
fuzz-regexp: regexp$(EXEEXT)
@mkdir -p corpus/regexp
./regexp$(EXEEXT) \
-dict=regexp.dict \
-max_len=10000 \
-timeout=20 \
corpus/regexp $(srcdir)/seed/regexp
uri_SOURCES = uri.c fuzz.c
uri_LDFLAGS = -fsanitize=fuzzer
fuzz-uri: uri$(EXEEXT)
@mkdir -p corpus/uri
./uri$(EXEEXT) \
-max_len=10000 \
-timeout=2 \
corpus/uri $(srcdir)/seed/uri

19
fuzz/README Normal file
View File

@ -0,0 +1,19 @@
libFuzzer instructions for libxml2
==================================
Set compiler and options:
export CC=clang
export CFLAGS="-g -fsanitize=fuzzer-no-link,address,undefined \
-fno-sanitize-recover=all \
-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION"
Build libxml2 with instrumentation:
./configure --without-python
make
Run fuzzers:
make -C fuzz fuzz-xml

274
fuzz/fuzz.c Normal file
View File

@ -0,0 +1,274 @@
/*
* fuzz.c: Common functions for fuzzing.
*
* See Copyright for the status of this software.
*/
#include <stdlib.h>
#include <string.h>
#include <libxml/hash.h>
#include <libxml/parser.h>
#include <libxml/parserInternals.h>
#include <libxml/tree.h>
#include <libxml/xmlIO.h>
#include "fuzz.h"
typedef struct {
const char *data;
size_t size;
} xmlFuzzEntityInfo;
/* Single static instance for now */
static struct {
/* Original data */
const char *data;
size_t size;
/* Remaining data */
const char *ptr;
size_t remaining;
/* Buffer for unescaped strings */
char *outBuf;
char *outPtr; /* Free space at end of buffer */
xmlHashTablePtr entities; /* Maps URLs to xmlFuzzEntityInfos */
/* The first entity is the main entity. */
const char *mainUrl;
xmlFuzzEntityInfo *mainEntity;
} fuzzData;
/**
* xmlFuzzErrorFunc:
*
* An error function that simply discards all errors.
*/
void
xmlFuzzErrorFunc(void *ctx ATTRIBUTE_UNUSED, const char *msg ATTRIBUTE_UNUSED,
...) {
}
/**
* xmlFuzzDataInit:
*
* Initialize fuzz data provider.
*/
void
xmlFuzzDataInit(const char *data, size_t size) {
fuzzData.data = data;
fuzzData.size = size;
fuzzData.ptr = data;
fuzzData.remaining = size;
fuzzData.outBuf = xmlMalloc(size + 1);
fuzzData.outPtr = fuzzData.outBuf;
fuzzData.entities = xmlHashCreate(8);
fuzzData.mainUrl = NULL;
fuzzData.mainEntity = NULL;
}
static void
xmlFreeEntityEntry(void *value, const xmlChar *name) {
xmlFree(value);
}
/**
* xmlFuzzDataFree:
*
* Cleanup fuzz data provider.
*/
void
xmlFuzzDataCleanup(void) {
xmlFree(fuzzData.outBuf);
xmlHashFree(fuzzData.entities, xmlFreeEntityEntry);
}
/**
* xmlFuzzReadInt:
* @size: size of string in bytes
*
* Read an integer from the fuzz data.
*/
int
xmlFuzzReadInt() {
int ret;
if (fuzzData.remaining < sizeof(int))
return(0);
memcpy(&ret, fuzzData.ptr, sizeof(int));
fuzzData.ptr += sizeof(int);
fuzzData.remaining -= sizeof(int);
return ret;
}
/**
* xmlFuzzReadString:
* @size: size of string in bytes
*
* Read a random-length string from the fuzz data.
*
* The format is similar to libFuzzer's FuzzedDataProvider but treats
* backslash followed by newline as end of string. This makes the fuzz data
* more readable. A backslash character is escaped with another backslash.
*
* Returns a zero-terminated string or NULL if the fuzz data is exhausted.
*/
static const char *
xmlFuzzReadString(size_t *size) {
const char *out = fuzzData.outPtr;
while (fuzzData.remaining > 0) {
int c = *fuzzData.ptr++;
fuzzData.remaining--;
if ((c == '\\') && (fuzzData.remaining > 0)) {
int c2 = *fuzzData.ptr;
if (c2 == '\n') {
fuzzData.ptr++;
fuzzData.remaining--;
*size = fuzzData.outPtr - out;
*fuzzData.outPtr++ = '\0';
return(out);
}
if (c2 == '\\') {
fuzzData.ptr++;
fuzzData.remaining--;
}
}
*fuzzData.outPtr++ = c;
}
if (fuzzData.outPtr > out) {
*size = fuzzData.outPtr - out;
*fuzzData.outPtr++ = '\0';
return(out);
}
return(NULL);
}
/**
* xmlFuzzReadEntities:
*
* Read entities like the main XML file, external DTDs, external parsed
* entities from fuzz data.
*/
void
xmlFuzzReadEntities(void) {
size_t num = 0;
while (1) {
const char *url, *entity;
size_t urlSize, entitySize;
xmlFuzzEntityInfo *entityInfo;
url = xmlFuzzReadString(&urlSize);
if (url == NULL) break;
entity = xmlFuzzReadString(&entitySize);
if (entity == NULL) break;
if (xmlHashLookup(fuzzData.entities, (xmlChar *)url) == NULL) {
entityInfo = xmlMalloc(sizeof(xmlFuzzEntityInfo));
entityInfo->data = entity;
entityInfo->size = entitySize;
xmlHashAddEntry(fuzzData.entities, (xmlChar *)url, entityInfo);
if (num == 0) {
fuzzData.mainUrl = url;
fuzzData.mainEntity = entityInfo;
}
num++;
}
}
}
/**
* xmlFuzzMainEntity:
* @size: size of the main entity in bytes
*
* Returns the main entity.
*/
const char *
xmlFuzzMainEntity(size_t *size) {
if (fuzzData.mainEntity == NULL)
return(NULL);
*size = fuzzData.mainEntity->size;
return(fuzzData.mainEntity->data);
}
/**
* xmlFuzzEntityLoader:
*
* The entity loader for fuzz data.
*/
xmlParserInputPtr
xmlFuzzEntityLoader(const char *URL, const char *ID ATTRIBUTE_UNUSED,
xmlParserCtxtPtr ctxt) {
xmlParserInputPtr input;
xmlFuzzEntityInfo *entity;
if (URL == NULL)
return(NULL);
entity = xmlHashLookup(fuzzData.entities, (xmlChar *) URL);
if (entity == NULL)
return(NULL);
input = xmlNewInputStream(ctxt);
input->filename = NULL;
input->buf = xmlParserInputBufferCreateMem(entity->data, entity->size,
XML_CHAR_ENCODING_NONE);
input->base = input->cur = xmlBufContent(input->buf->buffer);
input->end = input->base + entity->size;
return input;
}
/**
* xmlFuzzExtractStrings:
*
* Extract C strings from input data. Use exact-size allocations to detect
* potential memory errors.
*/
size_t
xmlFuzzExtractStrings(const char *data, size_t size, char **strings,
size_t numStrings) {
const char *start = data;
const char *end = data + size;
size_t i = 0, ret;
while (i < numStrings) {
size_t strSize = end - start;
const char *zero = memchr(start, 0, strSize);
if (zero != NULL)
strSize = zero - start;
strings[i] = xmlMalloc(strSize + 1);
memcpy(strings[i], start, strSize);
strings[i][strSize] = '\0';
i++;
if (zero != NULL)
start = zero + 1;
else
break;
}
ret = i;
while (i < numStrings) {
strings[i] = NULL;
i++;
}
return(ret);
}

55
fuzz/fuzz.h Normal file
View File

@ -0,0 +1,55 @@
/*
* fuzz.h: Common functions and macros for fuzzing.
*
* See Copyright for the status of this software.
*/
#ifndef __XML_FUZZERCOMMON_H__
#define __XML_FUZZERCOMMON_H__
#include <stddef.h>
#include <libxml/parser.h>
#ifdef __cplusplus
extern "C" {
#endif
int
LLVMFuzzerInitialize(int *argc, char ***argv);
int
LLVMFuzzerTestOneInput(const char *data, size_t size);
void
xmlFuzzErrorFunc(void *ctx ATTRIBUTE_UNUSED, const char *msg ATTRIBUTE_UNUSED,
...);
void
xmlFuzzDataInit(const char *data, size_t size);
void
xmlFuzzDataCleanup(void);
int
xmlFuzzReadInt(void);
void
xmlFuzzReadEntities(void);
const char *
xmlFuzzMainEntity(size_t *size);
xmlParserInputPtr
xmlFuzzEntityLoader(const char *URL, const char *ID ATTRIBUTE_UNUSED,
xmlParserCtxtPtr ctxt);
size_t
xmlFuzzExtractStrings(const char *data, size_t size, char **strings,
size_t numStrings);
#ifdef __cplusplus
}
#endif
#endif /* __XML_FUZZERCOMMON_H__ */

40
fuzz/regexp.c Normal file
View File

@ -0,0 +1,40 @@
/*
* regexp.c: a libFuzzer target to test the regexp module.
*
* See Copyright for the status of this software.
*/
#include <libxml/xmlregexp.h>
#include "fuzz.h"
int
LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED,
char ***argv ATTRIBUTE_UNUSED) {
xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc);
return 0;
}
int
LLVMFuzzerTestOneInput(const char *data, size_t size) {
xmlRegexpPtr regexp;
char *str[2] = { NULL, NULL };
size_t numStrings;
numStrings = xmlFuzzExtractStrings(data, size, str, 2);
regexp = xmlRegexpCompile(BAD_CAST str[0]);
/* xmlRegexpExec has pathological performance in too many cases. */
#if 0
if ((regexp != NULL) && (numStrings >= 2)) {
xmlRegexpExec(regexp, BAD_CAST str[1]);
}
#endif
xmlRegFreeRegexp(regexp);
xmlFree(str[0]);
xmlFree(str[1]);
return 0;
}

16
fuzz/regexp.dict Normal file
View File

@ -0,0 +1,16 @@
quant_any="*"
quant_opt="?"
quant_some="+"
quant_num="{1,2}"
branch="|a"
pos_group="[a]"
neg_group="[^a]"
cat_letter="\\p{L}"
cat_mark="\\p{M}"
cat_number="\\p{N}"
cat_punct="\\p{P}"
cat_sym="\\p{S}"
cat_sep="\\p{Z}"
cat_other="\\p{C}"

BIN
fuzz/seed/regexp/branch-1 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/branch-10 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/branch-11 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/branch-12 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/branch-13 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/branch-2 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/branch-3 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/branch-4 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/branch-5 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/branch-6 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/branch-7 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/branch-8 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/branch-9 Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
fuzz/seed/regexp/content-1 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/content-10 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/content-2 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/content-3 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/content-4 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/content-5 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/content-6 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/content-7 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/content-8 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/content-9 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/hard-1 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/hard-10 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/hard-2 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/hard-3 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/hard-4 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/hard-5 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/hard-6 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/hard-7 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/hard-8 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/hard-9 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/ncname-1 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/ncname-2 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/ncname-3 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/ncname-4 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/ncname-5 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/ranges-1 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/ranges-10 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/ranges-11 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/ranges-12 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/ranges-2 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/ranges-3 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/ranges-4 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/ranges-5 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/ranges-6 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/ranges-7 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/ranges-8 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/ranges-9 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/ranges2-1 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/ranges2-10 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/ranges2-11 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/ranges2-12 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/ranges2-2 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/ranges2-3 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/ranges2-4 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/ranges2-5 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/ranges2-6 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/ranges2-7 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/ranges2-8 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/ranges2-9 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/xpath-1 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/xpath-10 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/xpath-11 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/xpath-12 Normal file

Binary file not shown.

BIN
fuzz/seed/regexp/xpath-13 Normal file

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show More