mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-03-23 02:50:08 +03:00
Speed up HTML fuzzer
htmlDocDumpMemory uses the "HTML" encoding if no other encoding was specified in the source HTML. This encoding can be extremely slow because of an inefficiency in htmlEntityValueLookup. Stop encoding the output for now.
This commit is contained in:
parent
e6495e4789
commit
ec808a4415
15
fuzz/html.c
15
fuzz/html.c
@ -22,7 +22,7 @@ LLVMFuzzerTestOneInput(const char *data, size_t size) {
|
||||
static const size_t maxChunkSize = 128;
|
||||
htmlDocPtr doc;
|
||||
htmlParserCtxtPtr ctxt;
|
||||
xmlChar *out;
|
||||
xmlOutputBufferPtr out;
|
||||
const char *docBuffer;
|
||||
size_t docSize, consumed, chunkSize;
|
||||
int opts, outSize;
|
||||
@ -39,9 +39,16 @@ LLVMFuzzerTestOneInput(const char *data, size_t size) {
|
||||
/* Pull parser */
|
||||
|
||||
doc = htmlReadMemory(docBuffer, docSize, NULL, NULL, opts);
|
||||
/* Also test the serializer. */
|
||||
htmlDocDumpMemory(doc, &out, &outSize);
|
||||
xmlFree(out);
|
||||
|
||||
/*
|
||||
* Also test the serializer. Call htmlDocContentDumpOutput with our
|
||||
* own buffer to avoid encoding the output. The HTML encoding is
|
||||
* excruciatingly slow (see htmlEntityValueLookup).
|
||||
*/
|
||||
out = xmlAllocOutputBuffer(NULL);
|
||||
htmlDocContentDumpOutput(out, doc, NULL);
|
||||
xmlOutputBufferClose(out);
|
||||
|
||||
xmlFreeDoc(doc);
|
||||
|
||||
/* Push parser */
|
||||
|
Loading…
x
Reference in New Issue
Block a user