mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2024-12-23 17:33:50 +03:00
Update fuzzing code
- Shorten timeouts - Align options from Makefile and options files - Add section headers to Makefile - Skip invalid UTF-8 in regexp fuzzer - Update regexp.dict - Generate HTML seed corpus in correct format
This commit is contained in:
parent
68eadabd00
commit
905820a44c
2
fuzz/.gitignore
vendored
2
fuzz/.gitignore
vendored
@ -1,8 +1,10 @@
|
||||
corpus/
|
||||
html
|
||||
htmlSeed
|
||||
regexp
|
||||
schema
|
||||
schemaSeed
|
||||
seed/html*
|
||||
seed/xml*
|
||||
seed/schema*
|
||||
testFuzzer
|
||||
|
@ -1,11 +1,11 @@
|
||||
EXTRA_PROGRAMS = html regexp uri schema schemaSeed xml xmlSeed
|
||||
EXTRA_PROGRAMS = html htmlSeed regexp uri schema schemaSeed xml xmlSeed
|
||||
check_PROGRAMS = testFuzzer
|
||||
CLEANFILES = $(EXTRA_PROGRAMS)
|
||||
AM_CPPFLAGS = -I$(top_srcdir)/include
|
||||
DEPENDENCIES = $(top_builddir)/libxml2.la
|
||||
LDADD = $(STATIC_BINARIES) $(top_builddir)/libxml2.la $(THREAD_LIBS) $(Z_LIBS) $(LZMA_LIBS) $(ICONV_LIBS) $(M_LIBS) $(WIN32_EXTRA_LIBADD)
|
||||
|
||||
PARSER_FUZZER_MAX_LEN = 100000
|
||||
XML_MAX_LEN = 80000
|
||||
XML_SEED_CORPUS_SRC = \
|
||||
$(top_srcdir)/test/* \
|
||||
$(top_srcdir)/test/errors/*.xml \
|
||||
@ -16,6 +16,14 @@ XML_SEED_CORPUS_SRC = \
|
||||
$(top_srcdir)/test/VC/* \
|
||||
$(top_srcdir)/test/VCM/*
|
||||
|
||||
testFuzzer_SOURCES = testFuzzer.c fuzz.c
|
||||
|
||||
tests: testFuzzer$(EXEEXT)
|
||||
@echo "## Running fuzzer tests"
|
||||
@./testFuzzer$(EXEEXT)
|
||||
|
||||
# XML fuzzer
|
||||
|
||||
xmlSeed_SOURCES = xmlSeed.c fuzz.c
|
||||
|
||||
seed/xml.stamp: xmlSeed$(EXEEXT)
|
||||
@ -28,19 +36,13 @@ seed/xml.stamp: xmlSeed$(EXEEXT)
|
||||
pushd $$(dirname $$i) >/dev/null; \
|
||||
$(abs_builddir)/xmlSeed$(EXEEXT) $$base > $$outfile; \
|
||||
popd >/dev/null; \
|
||||
if [ "$$(wc -c < $$outfile)" -gt $(PARSER_FUZZER_MAX_LEN) ]; then \
|
||||
if [ "$$(wc -c < $$outfile)" -gt $(XML_MAX_LEN) ]; then \
|
||||
rm $$outfile; \
|
||||
fi; \
|
||||
fi; \
|
||||
done
|
||||
@touch seed/xml.stamp
|
||||
|
||||
testFuzzer_SOURCES = testFuzzer.c fuzz.c
|
||||
|
||||
tests: testFuzzer$(EXEEXT)
|
||||
@echo "## Running fuzzer tests"
|
||||
@./testFuzzer$(EXEEXT)
|
||||
|
||||
xml_SOURCES = xml.c fuzz.c
|
||||
xml_LDFLAGS = -fsanitize=fuzzer
|
||||
|
||||
@ -48,20 +50,36 @@ fuzz-xml: xml$(EXEEXT) seed/xml.stamp
|
||||
@mkdir -p corpus/xml
|
||||
./xml$(EXEEXT) \
|
||||
-dict=xml.dict \
|
||||
-max_len=$(PARSER_FUZZER_MAX_LEN) \
|
||||
-max_len=$(XML_MAX_LEN) \
|
||||
-timeout=20 \
|
||||
corpus/xml seed/xml
|
||||
|
||||
# HTML fuzzer
|
||||
|
||||
htmlSeed_SOURCES = htmlSeed.c fuzz.c
|
||||
|
||||
seed/html.stamp: htmlSeed$(EXEEXT)
|
||||
@mkdir -p seed/html
|
||||
@for i in $(top_srcdir)/test/HTML/*; do \
|
||||
if [ -f $$i ]; then \
|
||||
echo Processing seed $$i; \
|
||||
./htmlSeed$(EXEEXT) $$i > seed/html/$$(basename $$i); \
|
||||
fi; \
|
||||
done
|
||||
@touch seed/html.stamp
|
||||
|
||||
html_SOURCES = html.c fuzz.c
|
||||
html_LDFLAGS = -fsanitize=fuzzer
|
||||
|
||||
fuzz-html: html$(EXEEXT)
|
||||
fuzz-html: html$(EXEEXT) seed/html.stamp
|
||||
@mkdir -p corpus/html
|
||||
./html$(EXEEXT) \
|
||||
-dict=html.dict \
|
||||
-max_len=1000000 \
|
||||
-timeout=20 \
|
||||
corpus/html $(top_srcdir)/test/HTML
|
||||
corpus/html seed/html
|
||||
|
||||
# Regexp fuzzer
|
||||
|
||||
regexp_SOURCES = regexp.c fuzz.c
|
||||
regexp_LDFLAGS = -fsanitize=fuzzer
|
||||
@ -70,10 +88,12 @@ fuzz-regexp: regexp$(EXEEXT)
|
||||
@mkdir -p corpus/regexp
|
||||
./regexp$(EXEEXT) \
|
||||
-dict=regexp.dict \
|
||||
-max_len=10000 \
|
||||
-timeout=20 \
|
||||
-max_len=200 \
|
||||
-timeout=5 \
|
||||
corpus/regexp $(srcdir)/seed/regexp
|
||||
|
||||
# URI fuzzer
|
||||
|
||||
uri_SOURCES = uri.c fuzz.c
|
||||
uri_LDFLAGS = -fsanitize=fuzzer
|
||||
|
||||
@ -81,9 +101,11 @@ fuzz-uri: uri$(EXEEXT)
|
||||
@mkdir -p corpus/uri
|
||||
./uri$(EXEEXT) \
|
||||
-max_len=10000 \
|
||||
-timeout=2 \
|
||||
-timeout=5 \
|
||||
corpus/uri $(srcdir)/seed/uri
|
||||
|
||||
# XML Schema fuzzer
|
||||
|
||||
schemaSeed_SOURCES = schemaSeed.c fuzz.c
|
||||
|
||||
seed/schema.stamp: schemaSeed$(EXEEXT)
|
||||
@ -107,7 +129,7 @@ fuzz-schema: schema$(EXEEXT) seed/schema.stamp
|
||||
@mkdir -p corpus/schema
|
||||
./schema$(EXEEXT) \
|
||||
-dict=schema.dict \
|
||||
-max_len=$(PARSER_FUZZER_MAX_LEN) \
|
||||
-max_len=$(XML_MAX_LEN) \
|
||||
-timeout=20 \
|
||||
corpus/schema seed/schema
|
||||
|
||||
|
@ -1,2 +1,3 @@
|
||||
[libfuzzer]
|
||||
max_len = 1000000
|
||||
timeout = 20
|
||||
|
36
fuzz/htmlSeed.c
Normal file
36
fuzz/htmlSeed.c
Normal file
@ -0,0 +1,36 @@
|
||||
/*
|
||||
* htmlSeed.c: Generate the HTML seed corpus for fuzzing.
|
||||
*
|
||||
* See Copyright for the status of this software.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#define SEED_BUF_SIZE 16384
|
||||
|
||||
int
|
||||
main(int argc, char **argv) {
|
||||
int opts = 0;
|
||||
FILE *file;
|
||||
char buf[SEED_BUF_SIZE];
|
||||
size_t size;
|
||||
|
||||
if (argc != 2) {
|
||||
fprintf(stderr, "Usage: htmlSeed [FILE]\n");
|
||||
return(1);
|
||||
}
|
||||
|
||||
fwrite(&opts, sizeof(opts), 1, stdout);
|
||||
|
||||
/* Copy file */
|
||||
file = fopen(argv[1], "rb");
|
||||
do {
|
||||
size = fread(buf, 1, SEED_BUF_SIZE, file);
|
||||
if (size > 0)
|
||||
fwrite(buf, 1, size, stdout);
|
||||
} while (size == SEED_BUF_SIZE);
|
||||
fclose(file);
|
||||
|
||||
return(0);
|
||||
}
|
||||
|
@ -23,14 +23,17 @@ LLVMFuzzerTestOneInput(const char *data, size_t size) {
|
||||
|
||||
numStrings = xmlFuzzExtractStrings(data, size, str, 2);
|
||||
|
||||
regexp = xmlRegexpCompile(BAD_CAST str[0]);
|
||||
/* xmlRegexpExec has pathological performance in too many cases. */
|
||||
/* CUR_SCHAR doesn't handle invalid UTF-8 and may cause infinite loops. */
|
||||
if (xmlCheckUTF8(BAD_CAST str[0]) != 0) {
|
||||
regexp = xmlRegexpCompile(BAD_CAST str[0]);
|
||||
/* xmlRegexpExec has pathological performance in too many cases. */
|
||||
#if 0
|
||||
if ((regexp != NULL) && (numStrings >= 2)) {
|
||||
xmlRegexpExec(regexp, BAD_CAST str[1]);
|
||||
}
|
||||
if ((regexp != NULL) && (numStrings >= 2)) {
|
||||
xmlRegexpExec(regexp, BAD_CAST str[1]);
|
||||
}
|
||||
#endif
|
||||
xmlRegFreeRegexp(regexp);
|
||||
xmlRegFreeRegexp(regexp);
|
||||
}
|
||||
|
||||
xmlFree(str[0]);
|
||||
xmlFree(str[1]);
|
||||
|
139
fuzz/regexp.dict
139
fuzz/regexp.dict
@ -3,9 +3,19 @@ quant_opt="?"
|
||||
quant_some="+"
|
||||
quant_num="{1,2}"
|
||||
|
||||
dot="."
|
||||
branch="|a"
|
||||
parens="()"
|
||||
parens_inner=")("
|
||||
pos_group="[a]"
|
||||
neg_group="[^a]"
|
||||
class_subtraction="[a-[b]]"
|
||||
|
||||
esc_space="\\s"
|
||||
esc_initial="\\i"
|
||||
esc_name="\\c"
|
||||
esc_digit="\\d"
|
||||
esc_word="\\w"
|
||||
|
||||
cat_letter="\\p{L}"
|
||||
cat_mark="\\p{M}"
|
||||
@ -14,3 +24,132 @@ cat_punct="\\p{P}"
|
||||
cat_sym="\\p{S}"
|
||||
cat_sep="\\p{Z}"
|
||||
cat_other="\\p{C}"
|
||||
|
||||
block_aegean_numbers="\\p{IsAegeanNumbers}"
|
||||
block_alphabetic_presentation_forms="\\p{IsAlphabeticPresentationForms}"
|
||||
block_arabic="\\p{IsArabic}"
|
||||
block_arabic_presentation_forms_a="\\p{IsArabicPresentationFormsA}"
|
||||
block_arabic_presentation_forms_b="\\p{IsArabicPresentationFormsB}"
|
||||
block_armenian="\\p{IsArmenian}"
|
||||
block_arrows="\\p{IsArrows}"
|
||||
block_basic_latin="\\p{IsBasicLatin}"
|
||||
block_bengali="\\p{IsBengali}"
|
||||
block_block_elements="\\p{IsBlockElements}"
|
||||
block_bopomofo="\\p{IsBopomofo}"
|
||||
block_bopomofo_extended="\\p{IsBopomofoExtended}"
|
||||
block_box_drawing="\\p{IsBoxDrawing}"
|
||||
block_braille_patterns="\\p{IsBraillePatterns}"
|
||||
block_buhid="\\p{IsBuhid}"
|
||||
block_byzantine_musical_symbols="\\p{IsByzantineMusicalSymbols}"
|
||||
block_c_j_k_compatibility="\\p{IsCJKCompatibility}"
|
||||
block_c_j_k_compatibility_forms="\\p{IsCJKCompatibilityForms}"
|
||||
block_c_j_k_compatibility_ideographs="\\p{IsCJKCompatibilityIdeographs}"
|
||||
block_c_j_k_compatibility_ideographs_supplement="\\p{IsCJKCompatibilityIdeographsSupplement}"
|
||||
block_c_j_k_radicals_supplement="\\p{IsCJKRadicalsSupplement}"
|
||||
block_c_j_k_symbolsand_punctuation="\\p{IsCJKSymbolsandPunctuation}"
|
||||
block_c_j_k_unified_ideographs="\\p{IsCJKUnifiedIdeographs}"
|
||||
block_c_j_k_unified_ideographs_extension_a="\\p{IsCJKUnifiedIdeographsExtensionA}"
|
||||
block_cjk_unified_ideographs_extension_b="\\p{IsCJKUnifiedIdeographsExtensionB}"
|
||||
block_cherokee="\\p{IsCherokee}"
|
||||
block_combining_diacritical_marks="\\p{IsCombiningDiacriticalMarks}"
|
||||
block_combining_diacritical_marksfor_symbols="\\p{IsCombiningDiacriticalMarksforSymbols}"
|
||||
block_combining_half_marks="\\p{IsCombiningHalfMarks}"
|
||||
block_combining_marksfor_symbols="\\p{IsCombiningMarksforSymbols}"
|
||||
block_control_pictures="\\p{IsControlPictures}"
|
||||
block_currency_symbols="\\p{IsCurrencySymbols}"
|
||||
block_cypriot_syllabary="\\p{IsCypriotSyllabary}"
|
||||
block_cyrillic="\\p{IsCyrillic}"
|
||||
block_cyrillic_supplement="\\p{IsCyrillicSupplement}"
|
||||
block_deseret="\\p{IsDeseret}"
|
||||
block_devanagari="\\p{IsDevanagari}"
|
||||
block_dingbats="\\p{IsDingbats}"
|
||||
block_enclosed_alphanumerics="\\p{IsEnclosedAlphanumerics}"
|
||||
block_enclosed_cjk_lettersand_months="\\p{IsEnclosedCJKLettersandMonths}"
|
||||
block_ethiopic="\\p{IsEthiopic}"
|
||||
block_general_punctuation="\\p{IsGeneralPunctuation}"
|
||||
block_geometric_shapes="\\p{IsGeometricShapes}"
|
||||
block_georgian="\\p{IsGeorgian}"
|
||||
block_gothic="\\p{IsGothic}"
|
||||
block_greek="\\p{IsGreek}"
|
||||
block_greek_extended="\\p{IsGreekExtended}"
|
||||
block_greekand_coptic="\\p{IsGreekandCoptic}"
|
||||
block_gujarati="\\p{IsGujarati}"
|
||||
block_gurmukhi="\\p{IsGurmukhi}"
|
||||
block_halfwidthand_fullwidth_forms="\\p{IsHalfwidthandFullwidthForms}"
|
||||
block_hangul_compatibility_jamo="\\p{IsHangulCompatibilityJamo}"
|
||||
block_hangul_jamo="\\p{IsHangulJamo}"
|
||||
block_hangul_syllables="\\p{IsHangulSyllables}"
|
||||
block_hanunoo="\\p{IsHanunoo}"
|
||||
block_hebrew="\\p{IsHebrew}"
|
||||
block_high_private_use_surrogates="\\p{IsHighPrivateUseSurrogates}"
|
||||
block_high_surrogates="\\p{IsHighSurrogates}"
|
||||
block_hiragana="\\p{IsHiragana}"
|
||||
block_ipa_extensions="\\p{IsIPAExtensions}"
|
||||
block_ideographic_description_characters="\\p{IsIdeographicDescriptionCharacters}"
|
||||
block_kanbun="\\p{IsKanbun}"
|
||||
block_kangxi_radicals="\\p{IsKangxiRadicals}"
|
||||
block_kannada="\\p{IsKannada}"
|
||||
block_katakana="\\p{IsKatakana}"
|
||||
block_katakana_phonetic_extensions="\\p{IsKatakanaPhoneticExtensions}"
|
||||
block_khmer="\\p{IsKhmer}"
|
||||
block_khmer_symbols="\\p{IsKhmerSymbols}"
|
||||
block_lao="\\p{IsLao}"
|
||||
block_latin1Supplement="\\p{IsLatin1Supplement}"
|
||||
block_latin_extended_a="\\p{IsLatinExtendedA}"
|
||||
block_latin_extended_b="\\p{IsLatinExtendedB}"
|
||||
block_latin_extended_additional="\\p{IsLatinExtendedAdditional}"
|
||||
block_letterlike_symbols="\\p{IsLetterlikeSymbols}"
|
||||
block_limbu="\\p{IsLimbu}"
|
||||
block_linear_b_ideograms="\\p{IsLinearBIdeograms}"
|
||||
block_linear_b_syllabary="\\p{IsLinearBSyllabary}"
|
||||
block_low_surrogates="\\p{IsLowSurrogates}"
|
||||
block_malayalam="\\p{IsMalayalam}"
|
||||
block_mathematical_alphanumeric_symbols="\\p{IsMathematicalAlphanumericSymbols}"
|
||||
block_mathematical_operators="\\p{IsMathematicalOperators}"
|
||||
block_miscellaneous_mathematical_symbols_a="\\p{IsMiscellaneousMathematicalSymbolsA}"
|
||||
block_miscellaneous_mathematical_symbols_b="\\p{IsMiscellaneousMathematicalSymbolsB}"
|
||||
block_miscellaneous_symbols="\\p{IsMiscellaneousSymbols}"
|
||||
block_miscellaneous_symbolsand_arrows="\\p{IsMiscellaneousSymbolsandArrows}"
|
||||
block_miscellaneous_technical="\\p{IsMiscellaneousTechnical}"
|
||||
block_mongolian="\\p{IsMongolian}"
|
||||
block_musical_symbols="\\p{IsMusicalSymbols}"
|
||||
block_myanmar="\\p{IsMyanmar}"
|
||||
block_number_forms="\\p{IsNumberForms}"
|
||||
block_ogham="\\p{IsOgham}"
|
||||
block_old_italic="\\p{IsOldItalic}"
|
||||
block_optical_character_recognition="\\p{IsOpticalCharacterRecognition}"
|
||||
block_oriya="\\p{IsOriya}"
|
||||
block_osmanya="\\p{IsOsmanya}"
|
||||
block_phonetic_extensions="\\p{IsPhoneticExtensions}"
|
||||
block_private_use="\\p{IsPrivateUse}"
|
||||
block_private_use_area="\\p{IsPrivateUseArea}"
|
||||
block_runic="\\p{IsRunic}"
|
||||
block_shavian="\\p{IsShavian}"
|
||||
block_sinhala="\\p{IsSinhala}"
|
||||
block_small_form_variants="\\p{IsSmallFormVariants}"
|
||||
block_spacing_modifier_letters="\\p{IsSpacingModifierLetters}"
|
||||
block_specials="\\p{IsSpecials}"
|
||||
block_superscriptsand_subscripts="\\p{IsSuperscriptsandSubscripts}"
|
||||
block_supplemental_arrows_a="\\p{IsSupplementalArrowsA}"
|
||||
block_supplemental_arrows_b="\\p{IsSupplementalArrowsB}"
|
||||
block_supplemental_mathematical_operators="\\p{IsSupplementalMathematicalOperators}"
|
||||
block_supplementary_private_use_area_a="\\p{IsSupplementaryPrivateUseAreaA}"
|
||||
block_supplementary_private_use_area_b="\\p{IsSupplementaryPrivateUseAreaB}"
|
||||
block_syriac="\\p{IsSyriac}"
|
||||
block_tagalog="\\p{IsTagalog}"
|
||||
block_tagbanwa="\\p{IsTagbanwa}"
|
||||
block_tags="\\p{IsTags}"
|
||||
block_tai_le="\\p{IsTaiLe}"
|
||||
block_tai_xuan_jing_symbols="\\p{IsTaiXuanJingSymbols}"
|
||||
block_tamil="\\p{IsTamil}"
|
||||
block_telugu="\\p{IsTelugu}"
|
||||
block_thaana="\\p{IsThaana}"
|
||||
block_thai="\\p{IsThai}"
|
||||
block_tibetan="\\p{IsTibetan}"
|
||||
block_ugaritic="\\p{IsUgaritic}"
|
||||
block_unified_canadian_aboriginal_syllabics="\\p{IsUnifiedCanadianAboriginalSyllabics}"
|
||||
block_variation_selectors="\\p{IsVariationSelectors}"
|
||||
block_variation_selectors_supplement="\\p{IsVariationSelectorsSupplement}"
|
||||
block_yi_radicals="\\p{IsYiRadicals}"
|
||||
block_yi_syllables="\\p{IsYiSyllables}"
|
||||
block_yijing_hexagram_symbols="\\p{IsYijingHexagramSymbols}"
|
||||
|
3
fuzz/regexp.options
Normal file
3
fuzz/regexp.options
Normal file
@ -0,0 +1,3 @@
|
||||
[libfuzzer]
|
||||
max_len = 200
|
||||
timeout = 5
|
@ -1,2 +1,3 @@
|
||||
[libfuzzer]
|
||||
max_len = 80000
|
||||
timeout = 20
|
||||
|
3
fuzz/uri.options
Normal file
3
fuzz/uri.options
Normal file
@ -0,0 +1,3 @@
|
||||
[libfuzzer]
|
||||
max_len = 10000
|
||||
timeout = 5
|
@ -1,2 +1,3 @@
|
||||
[libfuzzer]
|
||||
max_len = 80000
|
||||
timeout = 20
|
||||
|
Loading…
Reference in New Issue
Block a user