mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2024-10-26 12:25:09 +03:00
fuzz: Add a few more comments
This commit is contained in:
parent
5bb84b47b8
commit
1f18d37798
65
fuzz/api.c
65
fuzz/api.c
@ -1,22 +1,35 @@
|
||||
/*
|
||||
* api.c: a libFuzzer target to test all kinds of API functions.
|
||||
* api.c: a libFuzzer target to test node-related API functions.
|
||||
*
|
||||
* See Copyright for the status of this software.
|
||||
*
|
||||
* This is a simple virtual machine which runs fuzz data as a program.
|
||||
* An important design goal is to execute as many API calls as possible
|
||||
* per input byte.
|
||||
*
|
||||
* There is a fixed number of registers for basic types like integers
|
||||
* or strings as well as libxml2 objects like xmlNode. An opcode
|
||||
* typically results in a call to an API function using the freshest
|
||||
* registers for each argument type and storing the result in the
|
||||
* stalest register. This can be implemented using a ring buffer.
|
||||
* We use a fixed number of registers for basic types like integers
|
||||
* or strings as well as libxml2 objects like xmlNode. The opcodes are
|
||||
* single bytes which typically result in a call to an API function
|
||||
* using the freshest registers for each argument type and storing the
|
||||
* result in the stalest register. This can be implemented using a ring
|
||||
* buffer.
|
||||
*
|
||||
* There are a few other opcodes to initialize or duplicate registers,
|
||||
* so all kinds of API calls can potentially be generated from
|
||||
* fuzz data.
|
||||
* so all kinds of API calls can potentially be generated from fuzz
|
||||
* data.
|
||||
*
|
||||
* TODO:
|
||||
* - Create documents with a dictionary.
|
||||
* This architecture is similar to stack machine and benefits from
|
||||
* great code density. The main difference is that values aren't
|
||||
* destroyed when popping arguments from the stack and that the bottom
|
||||
* of the stack is eventually overwritten if the ring buffer overflows.
|
||||
*
|
||||
* The main complication is memory management of nodes. Whenever a
|
||||
* reference between two nodes is removed, whether by an API call or
|
||||
* the VM clearing a register, we must check whether this leaves
|
||||
* unreferenced nodes which can then be freed. There are no opcodes
|
||||
* to free a node explicitly. The FIFO patterns generated by
|
||||
* overflowing the ring buffer and freeing the registers at the end of
|
||||
* a program seem to do a good enough job.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
@ -672,7 +685,7 @@ dropNode(xmlNodePtr node) {
|
||||
/*
|
||||
* removeNode and removeChildren remove all references to a node
|
||||
* or its children from the registers. These functions should be
|
||||
* called in an API function destroys nodes, for example by merging
|
||||
* called if an API function destroys nodes, for example by merging
|
||||
* text nodes.
|
||||
*/
|
||||
|
||||
@ -971,10 +984,25 @@ LLVMFuzzerTestOneInput(const char *data, size_t size) {
|
||||
maxAlloc = xmlFuzzReadInt(4) % (size * 50 + 10);
|
||||
xmlFuzzMemSetLimit(maxAlloc);
|
||||
|
||||
/*
|
||||
* Interpreter loop
|
||||
*
|
||||
* Processing an opcode typically involves
|
||||
*
|
||||
* - startOp for debugging
|
||||
* - increase output register index if non-void
|
||||
* - get arguments from input registers
|
||||
* - invoke API function
|
||||
* - set oomReport
|
||||
* - set output register
|
||||
* - memory management and other adjustments
|
||||
* - endOp for void functions
|
||||
*/
|
||||
|
||||
while (xmlFuzzBytesRemaining()) {
|
||||
size_t readSize;
|
||||
int op = xmlFuzzReadInt(1);
|
||||
int oomReport = -1;
|
||||
int oomReport = -1; /* -1 means unknown */
|
||||
|
||||
vars->opName = "[unset]";
|
||||
|
||||
@ -996,6 +1024,14 @@ LLVMFuzzerTestOneInput(const char *data, size_t size) {
|
||||
break;
|
||||
|
||||
case OP_PARSE_DOCUMENT:
|
||||
/*
|
||||
* We don't really want to test the parser but exposing
|
||||
* xmlReadDoc seems like a useful way generate or
|
||||
* round-trip documents.
|
||||
*
|
||||
* This also creates documents with a dictionary which
|
||||
* is crucial to hit some code paths.
|
||||
*/
|
||||
startOp("xmlReadDoc");
|
||||
incNodeIdx();
|
||||
setNode(0, (xmlNodePtr) xmlReadDoc(
|
||||
@ -1008,6 +1044,11 @@ LLVMFuzzerTestOneInput(const char *data, size_t size) {
|
||||
case OP_XML_NEW_DOC: {
|
||||
xmlDocPtr doc;
|
||||
|
||||
/*
|
||||
* TODO: There's no public API function to generate a
|
||||
* document with a dictionary. We should add an extra
|
||||
* opcode that sets doc->dict.
|
||||
*/
|
||||
startOp("xmlNewDoc");
|
||||
incNodeIdx();
|
||||
doc = xmlNewDoc(getStr(0));
|
||||
|
Loading…
Reference in New Issue
Block a user