2020-08-24 17:28:54 +03:00
/*
* xmlSeed . c : Generate the XML seed corpus for fuzzing .
*
* See Copyright for the status of this software .
*/
# include <stdio.h>
# include <string.h>
# include <glob.h>
# include <libgen.h>
# include <sys/stat.h>
# ifdef _WIN32
# include <direct.h>
# else
# include <unistd.h>
# endif
# include <libxml/parser.h>
# include <libxml/parserInternals.h>
# include <libxml/HTMLparser.h>
# include <libxml/xinclude.h>
# include <libxml/xmlschemas.h>
# include "fuzz.h"
# define PATH_SIZE 500
# define SEED_BUF_SIZE 16384
# define EXPR_SIZE 4500
typedef int
( * fileFunc ) ( const char * base , FILE * out ) ;
typedef int
( * mainFunc ) ( const char * arg ) ;
static struct {
FILE * out ;
xmlHashTablePtr entities ; /* Maps URLs to xmlFuzzEntityInfos */
xmlExternalEntityLoader oldLoader ;
fileFunc processFile ;
const char * fuzzer ;
int counter ;
char cwd [ PATH_SIZE ] ;
} globalData ;
2022-09-02 18:47:48 +03:00
# if defined(HAVE_SCHEMA_FUZZER) || \
defined ( HAVE_XML_FUZZER )
2020-08-24 17:28:54 +03:00
/*
* A custom entity loader that writes all external DTDs or entities to a
* single file in the format expected by xmlFuzzEntityLoader .
*/
static xmlParserInputPtr
fuzzEntityRecorder ( const char * URL , const char * ID ,
xmlParserCtxtPtr ctxt ) {
xmlParserInputPtr in ;
static const int chunkSize = 16384 ;
int len ;
in = xmlNoNetExternalEntityLoader ( URL , ID , ctxt ) ;
if ( in = = NULL )
return ( NULL ) ;
if ( globalData . entities = = NULL ) {
globalData . entities = xmlHashCreate ( 4 ) ;
} else if ( xmlHashLookup ( globalData . entities ,
( const xmlChar * ) URL ) ! = NULL ) {
return ( in ) ;
}
do {
len = xmlParserInputBufferGrow ( in - > buf , chunkSize ) ;
if ( len < 0 ) {
fprintf ( stderr , " Error reading %s \n " , URL ) ;
xmlFreeInputStream ( in ) ;
return ( NULL ) ;
}
} while ( len > 0 ) ;
xmlFuzzWriteString ( globalData . out , URL ) ;
xmlFuzzWriteString ( globalData . out ,
( char * ) xmlBufContent ( in - > buf - > buffer ) ) ;
xmlFreeInputStream ( in ) ;
xmlHashAddEntry ( globalData . entities , ( const xmlChar * ) URL , NULL ) ;
return ( xmlNoNetExternalEntityLoader ( URL , ID , ctxt ) ) ;
}
static void
fuzzRecorderInit ( FILE * out ) {
globalData . out = out ;
globalData . entities = xmlHashCreate ( 8 ) ;
globalData . oldLoader = xmlGetExternalEntityLoader ( ) ;
xmlSetExternalEntityLoader ( fuzzEntityRecorder ) ;
}
static void
2022-09-02 18:47:48 +03:00
fuzzRecorderCleanup ( void ) {
2020-08-24 17:28:54 +03:00
xmlSetExternalEntityLoader ( globalData . oldLoader ) ;
xmlHashFree ( globalData . entities , xmlHashDefaultDeallocator ) ;
globalData . out = NULL ;
globalData . entities = NULL ;
globalData . oldLoader = NULL ;
}
2022-09-02 18:47:48 +03:00
# endif
2020-08-24 17:28:54 +03:00
2021-02-22 23:26:13 +03:00
# ifdef HAVE_XML_FUZZER
2020-08-24 17:28:54 +03:00
static int
processXml ( const char * docFile , FILE * out ) {
int opts = XML_PARSE_NOENT | XML_PARSE_DTDLOAD ;
xmlDocPtr doc ;
fwrite ( & opts , sizeof ( opts ) , 1 , out ) ;
fuzzRecorderInit ( out ) ;
doc = xmlReadFile ( docFile , NULL , opts ) ;
xmlXIncludeProcessFlags ( doc , opts ) ;
xmlFreeDoc ( doc ) ;
fuzzRecorderCleanup ( ) ;
return ( 0 ) ;
}
2021-02-22 23:26:13 +03:00
# endif
2020-08-24 17:28:54 +03:00
2021-02-22 23:26:13 +03:00
# ifdef HAVE_HTML_FUZZER
2020-08-24 17:28:54 +03:00
static int
processHtml ( const char * docFile , FILE * out ) {
char buf [ SEED_BUF_SIZE ] ;
FILE * file ;
size_t size ;
int opts = 0 ;
fwrite ( & opts , sizeof ( opts ) , 1 , out ) ;
/* Copy file */
file = fopen ( docFile , " rb " ) ;
if ( file = = NULL ) {
fprintf ( stderr , " couldn't open %s \n " , docFile ) ;
return ( 0 ) ;
}
do {
size = fread ( buf , 1 , SEED_BUF_SIZE , file ) ;
if ( size > 0 )
fwrite ( buf , 1 , size , out ) ;
} while ( size = = SEED_BUF_SIZE ) ;
fclose ( file ) ;
return ( 0 ) ;
}
2021-02-22 23:26:13 +03:00
# endif
2020-08-24 17:28:54 +03:00
2021-02-22 23:26:13 +03:00
# ifdef HAVE_SCHEMA_FUZZER
2020-08-24 17:28:54 +03:00
static int
processSchema ( const char * docFile , FILE * out ) {
xmlSchemaPtr schema ;
xmlSchemaParserCtxtPtr pctxt ;
fuzzRecorderInit ( out ) ;
pctxt = xmlSchemaNewParserCtxt ( docFile ) ;
xmlSchemaSetParserErrors ( pctxt , xmlFuzzErrorFunc , xmlFuzzErrorFunc , NULL ) ;
schema = xmlSchemaParse ( pctxt ) ;
xmlSchemaFreeParserCtxt ( pctxt ) ;
xmlSchemaFree ( schema ) ;
fuzzRecorderCleanup ( ) ;
return ( 0 ) ;
}
2021-02-22 23:26:13 +03:00
# endif
2020-08-24 17:28:54 +03:00
2022-09-02 18:47:48 +03:00
# if defined(HAVE_HTML_FUZZER) || \
defined ( HAVE_SCHEMA_FUZZER ) | | \
defined ( HAVE_XML_FUZZER )
2020-08-24 17:28:54 +03:00
static int
processPattern ( const char * pattern ) {
glob_t globbuf ;
int ret = 0 ;
2022-09-02 18:47:48 +03:00
int res ;
size_t i ;
2020-08-24 17:28:54 +03:00
res = glob ( pattern , 0 , NULL , & globbuf ) ;
if ( res = = GLOB_NOMATCH )
return ( 0 ) ;
if ( res ! = 0 ) {
fprintf ( stderr , " couldn't match pattern %s \n " , pattern ) ;
return ( - 1 ) ;
}
for ( i = 0 ; i < globbuf . gl_pathc ; i + + ) {
struct stat statbuf ;
char outPath [ PATH_SIZE ] ;
char * dirBuf = NULL ;
char * baseBuf = NULL ;
const char * path , * dir , * base ;
FILE * out = NULL ;
int dirChanged = 0 ;
size_t size ;
path = globbuf . gl_pathv [ i ] ;
if ( ( stat ( path , & statbuf ) ! = 0 ) | | ( ! S_ISREG ( statbuf . st_mode ) ) )
continue ;
dirBuf = ( char * ) xmlCharStrdup ( path ) ;
baseBuf = ( char * ) xmlCharStrdup ( path ) ;
if ( ( dirBuf = = NULL ) | | ( baseBuf = = NULL ) ) {
fprintf ( stderr , " memory allocation failed \n " ) ;
ret = - 1 ;
goto error ;
}
dir = dirname ( dirBuf ) ;
base = basename ( baseBuf ) ;
size = snprintf ( outPath , sizeof ( outPath ) , " seed/%s/%s " ,
globalData . fuzzer , base ) ;
if ( size > = PATH_SIZE ) {
fprintf ( stderr , " creating path failed \n " ) ;
ret = - 1 ;
goto error ;
}
out = fopen ( outPath , " wb " ) ;
if ( out = = NULL ) {
fprintf ( stderr , " couldn't open %s for writing \n " , outPath ) ;
ret = - 1 ;
goto error ;
}
if ( chdir ( dir ) ! = 0 ) {
fprintf ( stderr , " couldn't chdir to %s \n " , dir ) ;
ret = - 1 ;
goto error ;
}
dirChanged = 1 ;
if ( globalData . processFile ( base , out ) ! = 0 )
ret = - 1 ;
error :
if ( out ! = NULL )
fclose ( out ) ;
xmlFree ( dirBuf ) ;
xmlFree ( baseBuf ) ;
if ( ( dirChanged ) & & ( chdir ( globalData . cwd ) ! = 0 ) ) {
fprintf ( stderr , " couldn't chdir to %s \n " , globalData . cwd ) ;
ret = - 1 ;
break ;
}
}
globfree ( & globbuf ) ;
return ( ret ) ;
}
2022-09-02 18:47:48 +03:00
# endif
2020-08-24 17:28:54 +03:00
2021-02-22 23:26:13 +03:00
# ifdef HAVE_XPATH_FUZZER
2020-08-24 17:28:54 +03:00
static int
processXPath ( const char * testDir , const char * prefix , const char * name ,
const char * data , const char * subdir , int xptr ) {
char pattern [ PATH_SIZE ] ;
glob_t globbuf ;
size_t i , size ;
int ret = 0 , res ;
size = snprintf ( pattern , sizeof ( pattern ) , " %s/%s/%s* " ,
testDir , subdir , prefix ) ;
if ( size > = PATH_SIZE )
return ( - 1 ) ;
res = glob ( pattern , 0 , NULL , & globbuf ) ;
if ( res = = GLOB_NOMATCH )
return ( 0 ) ;
if ( res ! = 0 ) {
fprintf ( stderr , " couldn't match pattern %s \n " , pattern ) ;
return ( - 1 ) ;
}
for ( i = 0 ; i < globbuf . gl_pathc ; i + + ) {
char * path = globbuf . gl_pathv [ i ] ;
struct stat statbuf ;
FILE * in ;
char expr [ EXPR_SIZE ] ;
if ( ( stat ( path , & statbuf ) ! = 0 ) | | ( ! S_ISREG ( statbuf . st_mode ) ) )
continue ;
in = fopen ( path , " rb " ) ;
if ( in = = NULL ) {
ret = - 1 ;
continue ;
}
2022-09-02 18:47:48 +03:00
while ( fgets ( expr , EXPR_SIZE , in ) ! = NULL ) {
2020-08-24 17:28:54 +03:00
char outPath [ PATH_SIZE ] ;
FILE * out ;
int j ;
for ( j = 0 ; expr [ j ] ! = 0 ; j + + )
if ( expr [ j ] = = ' \r ' | | expr [ j ] = = ' \n ' )
break ;
expr [ j ] = 0 ;
size = snprintf ( outPath , sizeof ( outPath ) , " seed/xpath/%s-%d " ,
name , globalData . counter ) ;
if ( size > = PATH_SIZE ) {
ret = - 1 ;
continue ;
}
out = fopen ( outPath , " wb " ) ;
if ( out = = NULL ) {
ret = - 1 ;
continue ;
}
if ( xptr ) {
xmlFuzzWriteString ( out , expr ) ;
} else {
char xptrExpr [ EXPR_SIZE + 100 ] ;
/* Wrap XPath expressions as XPointer */
snprintf ( xptrExpr , sizeof ( xptrExpr ) , " xpointer(%s) " , expr ) ;
xmlFuzzWriteString ( out , xptrExpr ) ;
}
xmlFuzzWriteString ( out , data ) ;
fclose ( out ) ;
globalData . counter + + ;
}
fclose ( in ) ;
}
globfree ( & globbuf ) ;
return ( ret ) ;
}
2022-09-02 18:47:48 +03:00
static int
2020-08-24 17:28:54 +03:00
processXPathDir ( const char * testDir ) {
char pattern [ PATH_SIZE ] ;
glob_t globbuf ;
size_t i , size ;
int ret = 0 ;
globalData . counter = 1 ;
if ( processXPath ( testDir , " " , " expr " , " <d></d> " , " expr " , 0 ) ! = 0 )
ret = - 1 ;
size = snprintf ( pattern , sizeof ( pattern ) , " %s/docs/* " , testDir ) ;
if ( size > = PATH_SIZE )
return ( 1 ) ;
if ( glob ( pattern , 0 , NULL , & globbuf ) ! = 0 )
return ( 1 ) ;
for ( i = 0 ; i < globbuf . gl_pathc ; i + + ) {
char * path = globbuf . gl_pathv [ i ] ;
char * data ;
const char * docFile ;
data = xmlSlurpFile ( path , NULL ) ;
if ( data = = NULL ) {
ret = - 1 ;
continue ;
}
docFile = basename ( path ) ;
globalData . counter = 1 ;
if ( processXPath ( testDir , docFile , docFile , data , " tests " , 0 ) ! = 0 )
ret = - 1 ;
if ( processXPath ( testDir , docFile , docFile , data , " xptr " , 1 ) ! = 0 )
ret = - 1 ;
2022-04-21 04:52:52 +03:00
if ( processXPath ( testDir , docFile , docFile , data , " xptr-xp1 " , 1 ) ! = 0 )
ret = - 1 ;
2020-08-24 17:28:54 +03:00
xmlFree ( data ) ;
}
globfree ( & globbuf ) ;
return ( ret ) ;
}
2021-02-22 23:26:13 +03:00
# endif
2020-08-24 17:28:54 +03:00
int
main ( int argc , const char * * argv ) {
2021-02-22 23:26:13 +03:00
mainFunc processArg = NULL ;
2020-08-24 17:28:54 +03:00
const char * fuzzer ;
int ret = 0 ;
int i ;
if ( argc < 3 ) {
fprintf ( stderr , " usage: seed [FUZZER] [PATTERN...] \n " ) ;
return ( 1 ) ;
}
xmlSetGenericErrorFunc ( NULL , xmlFuzzErrorFunc ) ;
fuzzer = argv [ 1 ] ;
if ( strcmp ( fuzzer , " html " ) = = 0 ) {
2021-02-22 23:26:13 +03:00
# ifdef HAVE_HTML_FUZZER
processArg = processPattern ;
2020-08-24 17:28:54 +03:00
globalData . processFile = processHtml ;
2021-02-22 23:26:13 +03:00
# endif
2020-08-24 17:28:54 +03:00
} else if ( strcmp ( fuzzer , " schema " ) = = 0 ) {
2021-02-22 23:26:13 +03:00
# ifdef HAVE_SCHEMA_FUZZER
processArg = processPattern ;
2020-08-24 17:28:54 +03:00
globalData . processFile = processSchema ;
2021-02-22 23:26:13 +03:00
# endif
2020-08-24 17:28:54 +03:00
} else if ( strcmp ( fuzzer , " xml " ) = = 0 ) {
2021-02-22 23:26:13 +03:00
# ifdef HAVE_XML_FUZZER
processArg = processPattern ;
2020-08-24 17:28:54 +03:00
globalData . processFile = processXml ;
2021-02-22 23:26:13 +03:00
# endif
2020-08-24 17:28:54 +03:00
} else if ( strcmp ( fuzzer , " xpath " ) = = 0 ) {
2021-02-22 23:26:13 +03:00
# ifdef HAVE_XPATH_FUZZER
2020-08-24 17:28:54 +03:00
processArg = processXPathDir ;
2021-02-22 23:26:13 +03:00
# endif
2020-08-24 17:28:54 +03:00
} else {
fprintf ( stderr , " unknown fuzzer %s \n " , fuzzer ) ;
return ( 1 ) ;
}
globalData . fuzzer = fuzzer ;
if ( getcwd ( globalData . cwd , PATH_SIZE ) = = NULL ) {
fprintf ( stderr , " couldn't get current directory \n " ) ;
return ( 1 ) ;
}
2021-02-22 23:26:13 +03:00
if ( processArg ! = NULL )
for ( i = 2 ; i < argc ; i + + )
processArg ( argv [ i ] ) ;
2020-08-24 17:28:54 +03:00
return ( ret ) ;
}