1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2024-12-23 17:33:50 +03:00

Update fuzzing code

- Shorten timeouts
- Align options from Makefile and options files
- Add section headers to Makefile
- Skip invalid UTF-8 in regexp fuzzer
- Update regexp.dict
- Generate HTML seed corpus in correct format
This commit is contained in:
Nick Wellnhofer 2020-07-12 22:59:39 +02:00
parent 68eadabd00
commit 905820a44c
10 changed files with 233 additions and 22 deletions

2
fuzz/.gitignore vendored
View File

@ -1,8 +1,10 @@
corpus/
html
htmlSeed
regexp
schema
schemaSeed
seed/html*
seed/xml*
seed/schema*
testFuzzer

View File

@ -1,11 +1,11 @@
EXTRA_PROGRAMS = html regexp uri schema schemaSeed xml xmlSeed
EXTRA_PROGRAMS = html htmlSeed regexp uri schema schemaSeed xml xmlSeed
check_PROGRAMS = testFuzzer
CLEANFILES = $(EXTRA_PROGRAMS)
AM_CPPFLAGS = -I$(top_srcdir)/include
DEPENDENCIES = $(top_builddir)/libxml2.la
LDADD = $(STATIC_BINARIES) $(top_builddir)/libxml2.la $(THREAD_LIBS) $(Z_LIBS) $(LZMA_LIBS) $(ICONV_LIBS) $(M_LIBS) $(WIN32_EXTRA_LIBADD)
PARSER_FUZZER_MAX_LEN = 100000
XML_MAX_LEN = 80000
XML_SEED_CORPUS_SRC = \
$(top_srcdir)/test/* \
$(top_srcdir)/test/errors/*.xml \
@ -16,6 +16,14 @@ XML_SEED_CORPUS_SRC = \
$(top_srcdir)/test/VC/* \
$(top_srcdir)/test/VCM/*
testFuzzer_SOURCES = testFuzzer.c fuzz.c
tests: testFuzzer$(EXEEXT)
@echo "## Running fuzzer tests"
@./testFuzzer$(EXEEXT)
# XML fuzzer
xmlSeed_SOURCES = xmlSeed.c fuzz.c
seed/xml.stamp: xmlSeed$(EXEEXT)
@ -28,19 +36,13 @@ seed/xml.stamp: xmlSeed$(EXEEXT)
pushd $$(dirname $$i) >/dev/null; \
$(abs_builddir)/xmlSeed$(EXEEXT) $$base > $$outfile; \
popd >/dev/null; \
if [ "$$(wc -c < $$outfile)" -gt $(PARSER_FUZZER_MAX_LEN) ]; then \
if [ "$$(wc -c < $$outfile)" -gt $(XML_MAX_LEN) ]; then \
rm $$outfile; \
fi; \
fi; \
done
@touch seed/xml.stamp
testFuzzer_SOURCES = testFuzzer.c fuzz.c
tests: testFuzzer$(EXEEXT)
@echo "## Running fuzzer tests"
@./testFuzzer$(EXEEXT)
xml_SOURCES = xml.c fuzz.c
xml_LDFLAGS = -fsanitize=fuzzer
@ -48,20 +50,36 @@ fuzz-xml: xml$(EXEEXT) seed/xml.stamp
@mkdir -p corpus/xml
./xml$(EXEEXT) \
-dict=xml.dict \
-max_len=$(PARSER_FUZZER_MAX_LEN) \
-max_len=$(XML_MAX_LEN) \
-timeout=20 \
corpus/xml seed/xml
# HTML fuzzer
htmlSeed_SOURCES = htmlSeed.c fuzz.c
seed/html.stamp: htmlSeed$(EXEEXT)
@mkdir -p seed/html
@for i in $(top_srcdir)/test/HTML/*; do \
if [ -f $$i ]; then \
echo Processing seed $$i; \
./htmlSeed$(EXEEXT) $$i > seed/html/$$(basename $$i); \
fi; \
done
@touch seed/html.stamp
html_SOURCES = html.c fuzz.c
html_LDFLAGS = -fsanitize=fuzzer
fuzz-html: html$(EXEEXT)
fuzz-html: html$(EXEEXT) seed/html.stamp
@mkdir -p corpus/html
./html$(EXEEXT) \
-dict=html.dict \
-max_len=1000000 \
-timeout=20 \
corpus/html $(top_srcdir)/test/HTML
corpus/html seed/html
# Regexp fuzzer
regexp_SOURCES = regexp.c fuzz.c
regexp_LDFLAGS = -fsanitize=fuzzer
@ -70,10 +88,12 @@ fuzz-regexp: regexp$(EXEEXT)
@mkdir -p corpus/regexp
./regexp$(EXEEXT) \
-dict=regexp.dict \
-max_len=10000 \
-timeout=20 \
-max_len=200 \
-timeout=5 \
corpus/regexp $(srcdir)/seed/regexp
# URI fuzzer
uri_SOURCES = uri.c fuzz.c
uri_LDFLAGS = -fsanitize=fuzzer
@ -81,9 +101,11 @@ fuzz-uri: uri$(EXEEXT)
@mkdir -p corpus/uri
./uri$(EXEEXT) \
-max_len=10000 \
-timeout=2 \
-timeout=5 \
corpus/uri $(srcdir)/seed/uri
# XML Schema fuzzer
schemaSeed_SOURCES = schemaSeed.c fuzz.c
seed/schema.stamp: schemaSeed$(EXEEXT)
@ -107,7 +129,7 @@ fuzz-schema: schema$(EXEEXT) seed/schema.stamp
@mkdir -p corpus/schema
./schema$(EXEEXT) \
-dict=schema.dict \
-max_len=$(PARSER_FUZZER_MAX_LEN) \
-max_len=$(XML_MAX_LEN) \
-timeout=20 \
corpus/schema seed/schema

View File

@ -1,2 +1,3 @@
[libfuzzer]
max_len = 1000000
timeout = 20

36
fuzz/htmlSeed.c Normal file
View File

@ -0,0 +1,36 @@
/*
* htmlSeed.c: Generate the HTML seed corpus for fuzzing.
*
* See Copyright for the status of this software.
*/
#include <stdio.h>
#define SEED_BUF_SIZE 16384
int
main(int argc, char **argv) {
int opts = 0;
FILE *file;
char buf[SEED_BUF_SIZE];
size_t size;
if (argc != 2) {
fprintf(stderr, "Usage: htmlSeed [FILE]\n");
return(1);
}
fwrite(&opts, sizeof(opts), 1, stdout);
/* Copy file */
file = fopen(argv[1], "rb");
do {
size = fread(buf, 1, SEED_BUF_SIZE, file);
if (size > 0)
fwrite(buf, 1, size, stdout);
} while (size == SEED_BUF_SIZE);
fclose(file);
return(0);
}

View File

@ -23,14 +23,17 @@ LLVMFuzzerTestOneInput(const char *data, size_t size) {
numStrings = xmlFuzzExtractStrings(data, size, str, 2);
regexp = xmlRegexpCompile(BAD_CAST str[0]);
/* xmlRegexpExec has pathological performance in too many cases. */
/* CUR_SCHAR doesn't handle invalid UTF-8 and may cause infinite loops. */
if (xmlCheckUTF8(BAD_CAST str[0]) != 0) {
regexp = xmlRegexpCompile(BAD_CAST str[0]);
/* xmlRegexpExec has pathological performance in too many cases. */
#if 0
if ((regexp != NULL) && (numStrings >= 2)) {
xmlRegexpExec(regexp, BAD_CAST str[1]);
}
if ((regexp != NULL) && (numStrings >= 2)) {
xmlRegexpExec(regexp, BAD_CAST str[1]);
}
#endif
xmlRegFreeRegexp(regexp);
xmlRegFreeRegexp(regexp);
}
xmlFree(str[0]);
xmlFree(str[1]);

View File

@ -3,9 +3,19 @@ quant_opt="?"
quant_some="+"
quant_num="{1,2}"
dot="."
branch="|a"
parens="()"
parens_inner=")("
pos_group="[a]"
neg_group="[^a]"
class_subtraction="[a-[b]]"
esc_space="\\s"
esc_initial="\\i"
esc_name="\\c"
esc_digit="\\d"
esc_word="\\w"
cat_letter="\\p{L}"
cat_mark="\\p{M}"
@ -14,3 +24,132 @@ cat_punct="\\p{P}"
cat_sym="\\p{S}"
cat_sep="\\p{Z}"
cat_other="\\p{C}"
block_aegean_numbers="\\p{IsAegeanNumbers}"
block_alphabetic_presentation_forms="\\p{IsAlphabeticPresentationForms}"
block_arabic="\\p{IsArabic}"
block_arabic_presentation_forms_a="\\p{IsArabicPresentationFormsA}"
block_arabic_presentation_forms_b="\\p{IsArabicPresentationFormsB}"
block_armenian="\\p{IsArmenian}"
block_arrows="\\p{IsArrows}"
block_basic_latin="\\p{IsBasicLatin}"
block_bengali="\\p{IsBengali}"
block_block_elements="\\p{IsBlockElements}"
block_bopomofo="\\p{IsBopomofo}"
block_bopomofo_extended="\\p{IsBopomofoExtended}"
block_box_drawing="\\p{IsBoxDrawing}"
block_braille_patterns="\\p{IsBraillePatterns}"
block_buhid="\\p{IsBuhid}"
block_byzantine_musical_symbols="\\p{IsByzantineMusicalSymbols}"
block_c_j_k_compatibility="\\p{IsCJKCompatibility}"
block_c_j_k_compatibility_forms="\\p{IsCJKCompatibilityForms}"
block_c_j_k_compatibility_ideographs="\\p{IsCJKCompatibilityIdeographs}"
block_c_j_k_compatibility_ideographs_supplement="\\p{IsCJKCompatibilityIdeographsSupplement}"
block_c_j_k_radicals_supplement="\\p{IsCJKRadicalsSupplement}"
block_c_j_k_symbolsand_punctuation="\\p{IsCJKSymbolsandPunctuation}"
block_c_j_k_unified_ideographs="\\p{IsCJKUnifiedIdeographs}"
block_c_j_k_unified_ideographs_extension_a="\\p{IsCJKUnifiedIdeographsExtensionA}"
block_cjk_unified_ideographs_extension_b="\\p{IsCJKUnifiedIdeographsExtensionB}"
block_cherokee="\\p{IsCherokee}"
block_combining_diacritical_marks="\\p{IsCombiningDiacriticalMarks}"
block_combining_diacritical_marksfor_symbols="\\p{IsCombiningDiacriticalMarksforSymbols}"
block_combining_half_marks="\\p{IsCombiningHalfMarks}"
block_combining_marksfor_symbols="\\p{IsCombiningMarksforSymbols}"
block_control_pictures="\\p{IsControlPictures}"
block_currency_symbols="\\p{IsCurrencySymbols}"
block_cypriot_syllabary="\\p{IsCypriotSyllabary}"
block_cyrillic="\\p{IsCyrillic}"
block_cyrillic_supplement="\\p{IsCyrillicSupplement}"
block_deseret="\\p{IsDeseret}"
block_devanagari="\\p{IsDevanagari}"
block_dingbats="\\p{IsDingbats}"
block_enclosed_alphanumerics="\\p{IsEnclosedAlphanumerics}"
block_enclosed_cjk_lettersand_months="\\p{IsEnclosedCJKLettersandMonths}"
block_ethiopic="\\p{IsEthiopic}"
block_general_punctuation="\\p{IsGeneralPunctuation}"
block_geometric_shapes="\\p{IsGeometricShapes}"
block_georgian="\\p{IsGeorgian}"
block_gothic="\\p{IsGothic}"
block_greek="\\p{IsGreek}"
block_greek_extended="\\p{IsGreekExtended}"
block_greekand_coptic="\\p{IsGreekandCoptic}"
block_gujarati="\\p{IsGujarati}"
block_gurmukhi="\\p{IsGurmukhi}"
block_halfwidthand_fullwidth_forms="\\p{IsHalfwidthandFullwidthForms}"
block_hangul_compatibility_jamo="\\p{IsHangulCompatibilityJamo}"
block_hangul_jamo="\\p{IsHangulJamo}"
block_hangul_syllables="\\p{IsHangulSyllables}"
block_hanunoo="\\p{IsHanunoo}"
block_hebrew="\\p{IsHebrew}"
block_high_private_use_surrogates="\\p{IsHighPrivateUseSurrogates}"
block_high_surrogates="\\p{IsHighSurrogates}"
block_hiragana="\\p{IsHiragana}"
block_ipa_extensions="\\p{IsIPAExtensions}"
block_ideographic_description_characters="\\p{IsIdeographicDescriptionCharacters}"
block_kanbun="\\p{IsKanbun}"
block_kangxi_radicals="\\p{IsKangxiRadicals}"
block_kannada="\\p{IsKannada}"
block_katakana="\\p{IsKatakana}"
block_katakana_phonetic_extensions="\\p{IsKatakanaPhoneticExtensions}"
block_khmer="\\p{IsKhmer}"
block_khmer_symbols="\\p{IsKhmerSymbols}"
block_lao="\\p{IsLao}"
block_latin1Supplement="\\p{IsLatin1Supplement}"
block_latin_extended_a="\\p{IsLatinExtendedA}"
block_latin_extended_b="\\p{IsLatinExtendedB}"
block_latin_extended_additional="\\p{IsLatinExtendedAdditional}"
block_letterlike_symbols="\\p{IsLetterlikeSymbols}"
block_limbu="\\p{IsLimbu}"
block_linear_b_ideograms="\\p{IsLinearBIdeograms}"
block_linear_b_syllabary="\\p{IsLinearBSyllabary}"
block_low_surrogates="\\p{IsLowSurrogates}"
block_malayalam="\\p{IsMalayalam}"
block_mathematical_alphanumeric_symbols="\\p{IsMathematicalAlphanumericSymbols}"
block_mathematical_operators="\\p{IsMathematicalOperators}"
block_miscellaneous_mathematical_symbols_a="\\p{IsMiscellaneousMathematicalSymbolsA}"
block_miscellaneous_mathematical_symbols_b="\\p{IsMiscellaneousMathematicalSymbolsB}"
block_miscellaneous_symbols="\\p{IsMiscellaneousSymbols}"
block_miscellaneous_symbolsand_arrows="\\p{IsMiscellaneousSymbolsandArrows}"
block_miscellaneous_technical="\\p{IsMiscellaneousTechnical}"
block_mongolian="\\p{IsMongolian}"
block_musical_symbols="\\p{IsMusicalSymbols}"
block_myanmar="\\p{IsMyanmar}"
block_number_forms="\\p{IsNumberForms}"
block_ogham="\\p{IsOgham}"
block_old_italic="\\p{IsOldItalic}"
block_optical_character_recognition="\\p{IsOpticalCharacterRecognition}"
block_oriya="\\p{IsOriya}"
block_osmanya="\\p{IsOsmanya}"
block_phonetic_extensions="\\p{IsPhoneticExtensions}"
block_private_use="\\p{IsPrivateUse}"
block_private_use_area="\\p{IsPrivateUseArea}"
block_runic="\\p{IsRunic}"
block_shavian="\\p{IsShavian}"
block_sinhala="\\p{IsSinhala}"
block_small_form_variants="\\p{IsSmallFormVariants}"
block_spacing_modifier_letters="\\p{IsSpacingModifierLetters}"
block_specials="\\p{IsSpecials}"
block_superscriptsand_subscripts="\\p{IsSuperscriptsandSubscripts}"
block_supplemental_arrows_a="\\p{IsSupplementalArrowsA}"
block_supplemental_arrows_b="\\p{IsSupplementalArrowsB}"
block_supplemental_mathematical_operators="\\p{IsSupplementalMathematicalOperators}"
block_supplementary_private_use_area_a="\\p{IsSupplementaryPrivateUseAreaA}"
block_supplementary_private_use_area_b="\\p{IsSupplementaryPrivateUseAreaB}"
block_syriac="\\p{IsSyriac}"
block_tagalog="\\p{IsTagalog}"
block_tagbanwa="\\p{IsTagbanwa}"
block_tags="\\p{IsTags}"
block_tai_le="\\p{IsTaiLe}"
block_tai_xuan_jing_symbols="\\p{IsTaiXuanJingSymbols}"
block_tamil="\\p{IsTamil}"
block_telugu="\\p{IsTelugu}"
block_thaana="\\p{IsThaana}"
block_thai="\\p{IsThai}"
block_tibetan="\\p{IsTibetan}"
block_ugaritic="\\p{IsUgaritic}"
block_unified_canadian_aboriginal_syllabics="\\p{IsUnifiedCanadianAboriginalSyllabics}"
block_variation_selectors="\\p{IsVariationSelectors}"
block_variation_selectors_supplement="\\p{IsVariationSelectorsSupplement}"
block_yi_radicals="\\p{IsYiRadicals}"
block_yi_syllables="\\p{IsYiSyllables}"
block_yijing_hexagram_symbols="\\p{IsYijingHexagramSymbols}"

3
fuzz/regexp.options Normal file
View File

@ -0,0 +1,3 @@
[libfuzzer]
max_len = 200
timeout = 5

View File

@ -1,2 +1,3 @@
[libfuzzer]
max_len = 80000
timeout = 20

3
fuzz/uri.options Normal file
View File

@ -0,0 +1,3 @@
[libfuzzer]
max_len = 10000
timeout = 5

View File

@ -1,2 +1,3 @@
[libfuzzer]
max_len = 80000
timeout = 20