2020-06-15 12:20:54 +02:00
/*
* html . c : a libFuzzer target to test several HTML parser interfaces .
*
* See Copyright for the status of this software .
*/
# include <libxml/HTMLparser.h>
# include <libxml/HTMLtree.h>
2021-02-22 17:58:06 +01:00
# include <libxml/catalog.h>
2020-06-15 12:20:54 +02:00
# include "fuzz.h"
int
LLVMFuzzerInitialize ( int * argc ATTRIBUTE_UNUSED ,
char * * * argv ATTRIBUTE_UNUSED ) {
2023-03-08 13:59:03 +01:00
xmlFuzzMemSetup ( ) ;
2020-06-15 12:20:54 +02:00
xmlInitParser ( ) ;
2021-02-22 21:28:21 +01:00
# ifdef LIBXML_CATALOG_ENABLED
2021-02-22 17:58:06 +01:00
xmlInitializeCatalog ( ) ;
2024-01-04 15:18:14 +01:00
xmlCatalogSetDefaults ( XML_CATA_ALLOW_NONE ) ;
2021-02-22 21:28:21 +01:00
# endif
2020-06-15 12:20:54 +02:00
return 0 ;
}
int
LLVMFuzzerTestOneInput ( const char * data , size_t size ) {
2023-12-10 19:07:32 +01:00
xmlParserCtxtPtr ctxt ;
2020-06-15 12:20:54 +02:00
htmlDocPtr doc ;
const char * docBuffer ;
2023-09-21 13:05:49 +02:00
size_t maxAlloc , docSize ;
2022-09-02 17:47:48 +02:00
int opts ;
2020-06-15 12:20:54 +02:00
xmlFuzzDataInit ( data , size ) ;
2023-03-08 13:59:00 +01:00
opts = ( int ) xmlFuzzReadInt ( 4 ) ;
2023-12-10 19:07:32 +01:00
maxAlloc = xmlFuzzReadInt ( 4 ) % ( size + 100 ) ;
2020-06-15 12:20:54 +02:00
docBuffer = xmlFuzzReadRemaining ( & docSize ) ;
if ( docBuffer = = NULL ) {
xmlFuzzDataCleanup ( ) ;
return ( 0 ) ;
}
/* Pull parser */
2023-03-08 13:59:03 +01:00
xmlFuzzMemSetLimit ( maxAlloc ) ;
2023-12-10 19:07:32 +01:00
ctxt = htmlNewParserCtxt ( ) ;
if ( ctxt ! = NULL ) {
2024-06-11 16:58:09 +02:00
xmlCtxtSetErrorHandler ( ctxt , xmlFuzzSErrorFunc , NULL ) ;
2023-12-10 19:07:32 +01:00
doc = htmlCtxtReadMemory ( ctxt , docBuffer , docSize , NULL , NULL , opts ) ;
xmlFuzzCheckMallocFailure ( " htmlCtxtReadMemory " ,
ctxt - > errNo = = XML_ERR_NO_MEMORY ) ;
if ( doc ! = NULL ) {
xmlDocPtr copy ;
2021-02-07 13:57:49 +01:00
2023-09-21 13:05:49 +02:00
# ifdef LIBXML_OUTPUT_ENABLED
2023-12-10 19:07:32 +01:00
xmlOutputBufferPtr out ;
const xmlChar * content ;
/*
* Also test the serializer . Call htmlDocContentDumpOutput with our
* own buffer to avoid encoding the output . The HTML encoding is
* excruciatingly slow ( see htmlEntityValueLookup ) .
*/
out = xmlAllocOutputBuffer ( NULL ) ;
htmlDocContentDumpOutput ( out , doc , NULL ) ;
content = xmlOutputBufferGetContent ( out ) ;
2024-02-29 14:51:49 +01:00
xmlOutputBufferClose ( out ) ;
2023-12-10 19:07:32 +01:00
xmlFuzzCheckMallocFailure ( " htmlDocContentDumpOutput " ,
content = = NULL ) ;
2023-09-21 13:05:49 +02:00
# endif
2021-02-07 13:57:49 +01:00
2023-12-10 19:07:32 +01:00
copy = xmlCopyDoc ( doc , 1 ) ;
xmlFuzzCheckMallocFailure ( " xmlCopyNode " , copy = = NULL ) ;
xmlFreeDoc ( copy ) ;
xmlFreeDoc ( doc ) ;
}
htmlFreeParserCtxt ( ctxt ) ;
}
2020-06-15 12:20:54 +02:00
/* Push parser */
2023-09-21 01:29:40 +02:00
# ifdef LIBXML_PUSH_ENABLED
2023-09-21 13:05:49 +02:00
{
static const size_t maxChunkSize = 128 ;
size_t consumed , chunkSize ;
xmlFuzzMemSetLimit ( maxAlloc ) ;
ctxt = htmlCreatePushParserCtxt ( NULL , NULL , NULL , 0 , NULL ,
XML_CHAR_ENCODING_NONE ) ;
if ( ctxt ! = NULL ) {
2024-06-11 16:58:09 +02:00
xmlCtxtSetErrorHandler ( ctxt , xmlFuzzSErrorFunc , NULL ) ;
2023-09-21 13:05:49 +02:00
htmlCtxtUseOptions ( ctxt , opts ) ;
for ( consumed = 0 ; consumed < docSize ; consumed + = chunkSize ) {
chunkSize = docSize - consumed ;
if ( chunkSize > maxChunkSize )
chunkSize = maxChunkSize ;
htmlParseChunk ( ctxt , docBuffer + consumed , chunkSize , 0 ) ;
}
htmlParseChunk ( ctxt , NULL , 0 , 1 ) ;
2023-12-10 19:07:32 +01:00
xmlFuzzCheckMallocFailure ( " htmlParseChunk " ,
ctxt - > errNo = = XML_ERR_NO_MEMORY ) ;
2023-09-21 13:05:49 +02:00
xmlFreeDoc ( ctxt - > myDoc ) ;
htmlFreeParserCtxt ( ctxt ) ;
2023-03-08 13:59:03 +01:00
}
}
2023-09-21 01:29:40 +02:00
# endif
2020-06-15 12:20:54 +02:00
/* Cleanup */
2023-03-08 13:59:03 +01:00
xmlFuzzMemSetLimit ( 0 ) ;
2020-06-15 12:20:54 +02:00
xmlFuzzDataCleanup ( ) ;
2021-02-22 21:28:21 +01:00
xmlResetLastError ( ) ;
2020-06-15 12:20:54 +02:00
return ( 0 ) ;
}