diff --git a/fuzz/.gitignore b/fuzz/.gitignore index 28b71084..178a6592 100644 --- a/fuzz/.gitignore +++ b/fuzz/.gitignore @@ -1,4 +1,5 @@ corpus/ +html regexp seed/xml* testFuzzer diff --git a/fuzz/Makefile.am b/fuzz/Makefile.am index 0e7391ba..a286f867 100644 --- a/fuzz/Makefile.am +++ b/fuzz/Makefile.am @@ -1,4 +1,4 @@ -EXTRA_PROGRAMS = regexp uri xml xmlSeed +EXTRA_PROGRAMS = html regexp uri xml xmlSeed check_PROGRAMS = testFuzzer CLEANFILES = $(EXTRA_PROGRAMS) AM_CPPFLAGS = -I$(top_srcdir)/include @@ -52,6 +52,17 @@ fuzz-xml: xml$(EXEEXT) seed/xml.stamp -timeout=20 \ corpus/xml seed/xml +html_SOURCES = html.c fuzz.c +html_LDFLAGS = -fsanitize=fuzzer + +fuzz-html: html$(EXEEXT) + @mkdir -p corpus/html + ./html$(EXEEXT) \ + -dict=html.dict \ + -max_len=1000000 \ + -timeout=20 \ + corpus/html $(top_srcdir)/test/HTML + regexp_SOURCES = regexp.c fuzz.c regexp_LDFLAGS = -fsanitize=fuzzer diff --git a/fuzz/fuzz.c b/fuzz/fuzz.c index 6955f280..57770c53 100644 --- a/fuzz/fuzz.c +++ b/fuzz/fuzz.c @@ -104,6 +104,23 @@ xmlFuzzReadInt() { return ret; } +/** + * xmlFuzzReadRemaining: + * @size: size of string in bytes + * + * Read remaining bytes from fuzz data. + */ +const char * +xmlFuzzReadRemaining(size_t *size) { + const char *ret = fuzzData.ptr; + + *size = fuzzData.remaining; + fuzzData.ptr += fuzzData.remaining; + fuzzData.remaining = 0; + + return(ret); +} + /** * xmlFuzzReadString: * @size: size of string in bytes diff --git a/fuzz/fuzz.h b/fuzz/fuzz.h index 1093be14..35702855 100644 --- a/fuzz/fuzz.h +++ b/fuzz/fuzz.h @@ -33,6 +33,9 @@ xmlFuzzDataCleanup(void); int xmlFuzzReadInt(void); +const char * +xmlFuzzReadRemaining(size_t *size); + void xmlFuzzReadEntities(void); diff --git a/fuzz/html.c b/fuzz/html.c new file mode 100644 index 00000000..d212c1f0 --- /dev/null +++ b/fuzz/html.c @@ -0,0 +1,70 @@ +/* + * html.c: a libFuzzer target to test several HTML parser interfaces. + * + * See Copyright for the status of this software. + */ + +#include +#include +#include "fuzz.h" + +int +LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED, + char ***argv ATTRIBUTE_UNUSED) { + xmlInitParser(); + xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc); + + return 0; +} + +int +LLVMFuzzerTestOneInput(const char *data, size_t size) { + static const size_t maxChunkSize = 128; + htmlDocPtr doc; + htmlParserCtxtPtr ctxt; + xmlChar *out; + const char *docBuffer; + size_t docSize, consumed, chunkSize; + int opts, outSize; + + xmlFuzzDataInit(data, size); + opts = xmlFuzzReadInt(); + + docBuffer = xmlFuzzReadRemaining(&docSize); + if (docBuffer == NULL) { + xmlFuzzDataCleanup(); + return(0); + } + + /* Pull parser */ + + doc = htmlReadMemory(docBuffer, docSize, NULL, NULL, opts); + /* Also test the serializer. */ + htmlDocDumpMemory(doc, &out, &outSize); + xmlFree(out); + xmlFreeDoc(doc); + + /* Push parser */ + + ctxt = htmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL, + XML_CHAR_ENCODING_NONE); + htmlCtxtUseOptions(ctxt, opts); + + for (consumed = 0; consumed < docSize; consumed += chunkSize) { + chunkSize = docSize - consumed; + if (chunkSize > maxChunkSize) + chunkSize = maxChunkSize; + htmlParseChunk(ctxt, docBuffer + consumed, chunkSize, 0); + } + + htmlParseChunk(ctxt, NULL, 0, 1); + xmlFreeDoc(ctxt->myDoc); + htmlFreeParserCtxt(ctxt); + + /* Cleanup */ + + xmlFuzzDataCleanup(); + + return(0); +} + diff --git a/fuzz/html.dict b/fuzz/html.dict new file mode 100644 index 00000000..9f58ed1e --- /dev/null +++ b/fuzz/html.dict @@ -0,0 +1,107 @@ +elem_a="" +elem_abbr="" +elem_acronym="" +elem_address="
" +elem_applet="" +elem_area="" +elem_b="" +elem_base="" +elem_basefont="" +elem_bdo="" +elem_big="" +elem_blockquote="
" +elem_body="" +elem_br="
" +elem_button="" +elem_caption="" +elem_center="
" +elem_cite="" +elem_code="" +elem_col="" +elem_colgroup="" +elem_dd="
" +elem_del="" +elem_dfn="" +elem_dir="" +elem_div="
" +elem_dl="
" +elem_dt="
" +elem_em="" +elem_embed="" +elem_fieldset="
" +elem_font="" +elem_form="
" +elem_frame="" +elem_frameset="" +elem_h1="

" +elem_h2="

" +elem_h3="

" +elem_h4="

" +elem_h5="
" +elem_h6="
" +elem_head="" +elem_hr="
" +elem_html="" +elem_i="" +elem_iframe="" +elem_img="" +elem_input="" +elem_ins="" +elem_isindex="" +elem_kbd="" +elem_label="" +elem_legend="" +elem_li="
  • " +elem_link="" +elem_map="" +elem_menu="" +elem_meta="" +elem_noframes="" +elem_noscript="" +elem_object="" +elem_ol="
      " +elem_optgroup="" +elem_option="" +elem_p="

      " +elem_param="" +elem_pre="
      "
      +elem_q=""
      +elem_s=""
      +elem_samp=""
      +elem_script=""
      +elem_select=""
      +elem_small=""
      +elem_span=""
      +elem_strike=""
      +elem_strong=""
      +elem_style=""
      +elem_sub=""
      +elem_sup=""
      +elem_table="
      " +elem_tbody="" +elem_td="" +elem_textarea="" +elem_tfoot="" +elem_th="" +elem_thead="" +elem_title="" +elem_tr="" +elem_tt="" +elem_u="" +elem_ul="
        " +elem_var="" + +attr_id=" id=\"\"" +attr_style=" style=\"\"" + +comment="" + +pi="" + +ref_lt="<" +ref_gt=">" +ref_amp="&" +ref_apos="'" +ref_quot=""" +ref_dec=" " +ref_hex=" " diff --git a/fuzz/html.options b/fuzz/html.options new file mode 100644 index 00000000..e5ae71b9 --- /dev/null +++ b/fuzz/html.options @@ -0,0 +1,2 @@ +[libfuzzer] +max_len = 1000000