1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-01-27 14:03:36 +03:00

fuzz: Abort on invalid UTF-8

The parser should never generate invalid UTF-8 these days even in
recovery mode.
This commit is contained in:
Nick Wellnhofer 2024-01-04 21:20:51 +01:00
parent df098e3bf6
commit c2b3294f60
2 changed files with 21 additions and 8 deletions

View File

@ -652,13 +652,18 @@ xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input, int attr) {
l = 4;
val = xmlGetUTF8Char(cur, &l);
if ((val < 0) || (!IS_CHAR(val))) {
snprintf(buf, sizeof(buf), "&#%d;", *cur);
buf[sizeof(buf) - 1] = 0;
ptr = buf;
while (*ptr != 0) *out++ = *ptr++;
cur++;
continue;
if (val < 0) {
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
fprintf(stderr, "xmlEncodeEntitiesInternal: "
"invalid UTF-8\n");
abort();
#endif
val = 0xFFFD;
cur++;
} else {
if (!IS_CHAR(val))
val = 0xFFFD;
cur += l;
}
/*
* We could do multiple things here. Just save as a char ref
@ -667,7 +672,6 @@ xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input, int attr) {
buf[sizeof(buf) - 1] = 0;
ptr = buf;
while (*ptr != 0) *out++ = *ptr++;
cur += l;
continue;
}
} else if (IS_BYTE_CHAR(*cur)) {

View File

@ -9,6 +9,7 @@
#define IN_LIBXML
#include "libxml.h"
#include <stdlib.h>
#include <string.h>
#include <libxml/xmlmemory.h>
#include <libxml/parserInternals.h>
@ -244,6 +245,10 @@ xmlEscapeEntities(unsigned char* out, int *outlen,
val = xmlGetUTF8Char(in, &len);
if (val < 0) {
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
fprintf(stderr, "xmlEscapeEntities: invalid UTF-8\n");
abort();
#endif
val = 0xFFFD;
in++;
} else {
@ -2045,6 +2050,10 @@ xmlBufAttrSerializeTxtContent(xmlBufPtr buf, xmlDocPtr doc,
val = xmlGetUTF8Char(cur, &l);
if (val < 0) {
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
fprintf(stderr, "xmlEscapeEntities: invalid UTF-8\n");
abort();
#endif
val = 0xFFFD;
cur++;
} else {