1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2024-10-26 12:25:09 +03:00

fuzz: Add a few more comments

This commit is contained in:
Nick Wellnhofer 2024-04-02 23:19:28 +02:00
parent 5bb84b47b8
commit 1f18d37798

View File

@ -1,22 +1,35 @@
/*
* api.c: a libFuzzer target to test all kinds of API functions.
* api.c: a libFuzzer target to test node-related API functions.
*
* See Copyright for the status of this software.
*
* This is a simple virtual machine which runs fuzz data as a program.
* An important design goal is to execute as many API calls as possible
* per input byte.
*
* There is a fixed number of registers for basic types like integers
* or strings as well as libxml2 objects like xmlNode. An opcode
* typically results in a call to an API function using the freshest
* registers for each argument type and storing the result in the
* stalest register. This can be implemented using a ring buffer.
* We use a fixed number of registers for basic types like integers
* or strings as well as libxml2 objects like xmlNode. The opcodes are
* single bytes which typically result in a call to an API function
* using the freshest registers for each argument type and storing the
* result in the stalest register. This can be implemented using a ring
* buffer.
*
* There are a few other opcodes to initialize or duplicate registers,
* so all kinds of API calls can potentially be generated from
* fuzz data.
* so all kinds of API calls can potentially be generated from fuzz
* data.
*
* TODO:
* - Create documents with a dictionary.
* This architecture is similar to stack machine and benefits from
* great code density. The main difference is that values aren't
* destroyed when popping arguments from the stack and that the bottom
* of the stack is eventually overwritten if the ring buffer overflows.
*
* The main complication is memory management of nodes. Whenever a
* reference between two nodes is removed, whether by an API call or
* the VM clearing a register, we must check whether this leaves
* unreferenced nodes which can then be freed. There are no opcodes
* to free a node explicitly. The FIFO patterns generated by
* overflowing the ring buffer and freeing the registers at the end of
* a program seem to do a good enough job.
*/
#include <stdlib.h>
@ -672,7 +685,7 @@ dropNode(xmlNodePtr node) {
/*
* removeNode and removeChildren remove all references to a node
* or its children from the registers. These functions should be
* called in an API function destroys nodes, for example by merging
* called if an API function destroys nodes, for example by merging
* text nodes.
*/
@ -971,10 +984,25 @@ LLVMFuzzerTestOneInput(const char *data, size_t size) {
maxAlloc = xmlFuzzReadInt(4) % (size * 50 + 10);
xmlFuzzMemSetLimit(maxAlloc);
/*
* Interpreter loop
*
* Processing an opcode typically involves
*
* - startOp for debugging
* - increase output register index if non-void
* - get arguments from input registers
* - invoke API function
* - set oomReport
* - set output register
* - memory management and other adjustments
* - endOp for void functions
*/
while (xmlFuzzBytesRemaining()) {
size_t readSize;
int op = xmlFuzzReadInt(1);
int oomReport = -1;
int oomReport = -1; /* -1 means unknown */
vars->opName = "[unset]";
@ -996,6 +1024,14 @@ LLVMFuzzerTestOneInput(const char *data, size_t size) {
break;
case OP_PARSE_DOCUMENT:
/*
* We don't really want to test the parser but exposing
* xmlReadDoc seems like a useful way generate or
* round-trip documents.
*
* This also creates documents with a dictionary which
* is crucial to hit some code paths.
*/
startOp("xmlReadDoc");
incNodeIdx();
setNode(0, (xmlNodePtr) xmlReadDoc(
@ -1008,6 +1044,11 @@ LLVMFuzzerTestOneInput(const char *data, size_t size) {
case OP_XML_NEW_DOC: {
xmlDocPtr doc;
/*
* TODO: There's no public API function to generate a
* document with a dictionary. We should add an extra
* opcode that sets doc->dict.
*/
startOp("xmlNewDoc");
incNodeIdx();
doc = xmlNewDoc(getStr(0));