1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-03-21 22:50:08 +03:00

Fuzz target for HTML parser

This commit is contained in:
Nick Wellnhofer 2020-06-15 12:20:54 +02:00
parent a697ed1e24
commit 536f421d37
7 changed files with 212 additions and 1 deletions

1
fuzz/.gitignore vendored
View File

@ -1,4 +1,5 @@
corpus/
html
regexp
seed/xml*
testFuzzer

View File

@ -1,4 +1,4 @@
EXTRA_PROGRAMS = regexp uri xml xmlSeed
EXTRA_PROGRAMS = html regexp uri xml xmlSeed
check_PROGRAMS = testFuzzer
CLEANFILES = $(EXTRA_PROGRAMS)
AM_CPPFLAGS = -I$(top_srcdir)/include
@ -52,6 +52,17 @@ fuzz-xml: xml$(EXEEXT) seed/xml.stamp
-timeout=20 \
corpus/xml seed/xml
html_SOURCES = html.c fuzz.c
html_LDFLAGS = -fsanitize=fuzzer
fuzz-html: html$(EXEEXT)
@mkdir -p corpus/html
./html$(EXEEXT) \
-dict=html.dict \
-max_len=1000000 \
-timeout=20 \
corpus/html $(top_srcdir)/test/HTML
regexp_SOURCES = regexp.c fuzz.c
regexp_LDFLAGS = -fsanitize=fuzzer

View File

@ -104,6 +104,23 @@ xmlFuzzReadInt() {
return ret;
}
/**
* xmlFuzzReadRemaining:
* @size: size of string in bytes
*
* Read remaining bytes from fuzz data.
*/
const char *
xmlFuzzReadRemaining(size_t *size) {
const char *ret = fuzzData.ptr;
*size = fuzzData.remaining;
fuzzData.ptr += fuzzData.remaining;
fuzzData.remaining = 0;
return(ret);
}
/**
* xmlFuzzReadString:
* @size: size of string in bytes

View File

@ -33,6 +33,9 @@ xmlFuzzDataCleanup(void);
int
xmlFuzzReadInt(void);
const char *
xmlFuzzReadRemaining(size_t *size);
void
xmlFuzzReadEntities(void);

70
fuzz/html.c Normal file
View File

@ -0,0 +1,70 @@
/*
* html.c: a libFuzzer target to test several HTML parser interfaces.
*
* See Copyright for the status of this software.
*/
#include <libxml/HTMLparser.h>
#include <libxml/HTMLtree.h>
#include "fuzz.h"
int
LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED,
char ***argv ATTRIBUTE_UNUSED) {
xmlInitParser();
xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc);
return 0;
}
int
LLVMFuzzerTestOneInput(const char *data, size_t size) {
static const size_t maxChunkSize = 128;
htmlDocPtr doc;
htmlParserCtxtPtr ctxt;
xmlChar *out;
const char *docBuffer;
size_t docSize, consumed, chunkSize;
int opts, outSize;
xmlFuzzDataInit(data, size);
opts = xmlFuzzReadInt();
docBuffer = xmlFuzzReadRemaining(&docSize);
if (docBuffer == NULL) {
xmlFuzzDataCleanup();
return(0);
}
/* Pull parser */
doc = htmlReadMemory(docBuffer, docSize, NULL, NULL, opts);
/* Also test the serializer. */
htmlDocDumpMemory(doc, &out, &outSize);
xmlFree(out);
xmlFreeDoc(doc);
/* Push parser */
ctxt = htmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL,
XML_CHAR_ENCODING_NONE);
htmlCtxtUseOptions(ctxt, opts);
for (consumed = 0; consumed < docSize; consumed += chunkSize) {
chunkSize = docSize - consumed;
if (chunkSize > maxChunkSize)
chunkSize = maxChunkSize;
htmlParseChunk(ctxt, docBuffer + consumed, chunkSize, 0);
}
htmlParseChunk(ctxt, NULL, 0, 1);
xmlFreeDoc(ctxt->myDoc);
htmlFreeParserCtxt(ctxt);
/* Cleanup */
xmlFuzzDataCleanup();
return(0);
}

107
fuzz/html.dict Normal file
View File

@ -0,0 +1,107 @@
elem_a="<a></a>"
elem_abbr="<abbr></abbr>"
elem_acronym="<acronym></acronym>"
elem_address="<address></address>"
elem_applet="<applet></applet>"
elem_area="<area>"
elem_b="<b></b>"
elem_base="<base>"
elem_basefont="<basefont>"
elem_bdo="<bdo></bdo>"
elem_big="<big></big>"
elem_blockquote="<blockquote></blockquote>"
elem_body="<body></body>"
elem_br="<br>"
elem_button="<button></button>"
elem_caption="<caption></caption>"
elem_center="<center></center>"
elem_cite="<cite></cite>"
elem_code="<code></code>"
elem_col="<col>"
elem_colgroup="<colgroup></colgroup>"
elem_dd="<dd></dd>"
elem_del="<del></del>"
elem_dfn="<dfn></dfn>"
elem_dir="<dir></dir>"
elem_div="<div></div>"
elem_dl="<dl></dl>"
elem_dt="<dt></dt>"
elem_em="<em></em>"
elem_embed="<embed></embed>"
elem_fieldset="<fieldset></fieldset>"
elem_font="<font></font>"
elem_form="<form></form>"
elem_frame="<frame>"
elem_frameset="<frameset></frameset>"
elem_h1="<h1></h1>"
elem_h2="<h2></h2>"
elem_h3="<h3></h3>"
elem_h4="<h4></h4>"
elem_h5="<h5></h5>"
elem_h6="<h6></h6>"
elem_head="<head></head>"
elem_hr="<hr>"
elem_html="<html></html>"
elem_i="<i></i>"
elem_iframe="<iframe></iframe>"
elem_img="<img>"
elem_input="<input>"
elem_ins="<ins></ins>"
elem_isindex="<isindex>"
elem_kbd="<kbd></kbd>"
elem_label="<label></label>"
elem_legend="<legend></legend>"
elem_li="<li></li>"
elem_link="<link>"
elem_map="<map></map>"
elem_menu="<menu></menu>"
elem_meta="<meta>"
elem_noframes="<noframes></noframes>"
elem_noscript="<noscript></noscript>"
elem_object="<object></object>"
elem_ol="<ol></ol>"
elem_optgroup="<optgroup></optgroup>"
elem_option="<option></option>"
elem_p="<p></p>"
elem_param="<param>"
elem_pre="<pre></pre>"
elem_q="<q></q>"
elem_s="<s></s>"
elem_samp="<samp></samp>"
elem_script="<script></script>"
elem_select="<select></select>"
elem_small="<small></small>"
elem_span="<span></span>"
elem_strike="<strike></strike>"
elem_strong="<strong></strong>"
elem_style="<style></style>"
elem_sub="<sub></sub>"
elem_sup="<sup></sup>"
elem_table="<table></table>"
elem_tbody="<tbody></tbody>"
elem_td="<td></td>"
elem_textarea="<textarea></textarea>"
elem_tfoot="<tfoot></tfoot>"
elem_th="<th></th>"
elem_thead="<thead></thead>"
elem_title="<title></title>"
elem_tr="<tr></tr>"
elem_tt="<tt></tt>"
elem_u="<u></u>"
elem_ul="<ul></ul>"
elem_var="<var></var>"
attr_id=" id=\"\""
attr_style=" style=\"\""
comment="<!-- -->"
pi="<?a?>"
ref_lt="&lt;"
ref_gt="&gt;"
ref_amp="&amp;"
ref_apos="&apos;"
ref_quot="&quot;"
ref_dec="&#9;"
ref_hex="&#xA;"

2
fuzz/html.options Normal file
View File

@ -0,0 +1,2 @@
[libfuzzer]
max_len = 1000000