1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-03-20 18:50:08 +03:00

Fix pathological performance when outputting charrefs

If a character can't be represented in the output encoding, it is
converted to a character reference. This used to to replace the
character in the input stream by calling xmlBufAddHead or
xmlBufferAddHead. These functions shifted the entire input array
around, leading to quadratic performance when converting a run of
non-representable characters. This is most pronounced when dumping to
memory.

Output the charref directly instead.

Found with libFuzzer.
This commit is contained in:
Nick Wellnhofer 2017-06-19 15:32:56 +02:00
parent c9ccbd6a6d
commit e5107772ff

View File

@ -2395,7 +2395,6 @@ xmlCharEncOutput(xmlOutputBufferPtr output, int init)
int c_out;
xmlBufPtr in;
xmlBufPtr out;
int charref_len = 0;
if ((output == NULL) || (output->encoder == NULL) ||
(output->buffer == NULL) || (output->conv == NULL))
@ -2451,7 +2450,6 @@ retry:
if (ret == -1) {
if (c_out > 0) {
/* Can be a limitation of iconv or uconv */
charref_len = 0;
goto retry;
}
ret = -3;
@ -2488,46 +2486,38 @@ retry:
ret = -1;
break;
case -2: {
xmlChar charref[20];
int len = (int) xmlBufUse(in);
xmlChar *content = xmlBufContent(in);
int cur;
int cur, charrefLen;
cur = xmlGetUTF8Char(content, &len);
if ((charref_len != 0) && (c_out < charref_len)) {
/*
* We attempted to insert a character reference and failed.
* Undo what was written and skip the remaining charref.
*/
xmlBufErase(out, c_out);
writtentot -= c_out;
xmlBufShrink(in, charref_len - c_out);
charref_len = 0;
ret = -1;
if (cur <= 0)
break;
} else if (cur > 0) {
xmlChar charref[20];
#ifdef DEBUG_ENCODING
xmlGenericError(xmlGenericErrorContext,
"handling output conversion error\n");
xmlGenericError(xmlGenericErrorContext,
"Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
content[0], content[1],
content[2], content[3]);
xmlGenericError(xmlGenericErrorContext,
"handling output conversion error\n");
xmlGenericError(xmlGenericErrorContext,
"Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
content[0], content[1],
content[2], content[3]);
#endif
/*
* Removes the UTF8 sequence, and replace it by a charref
* and continue the transcoding phase, hoping the error
* did not mangle the encoder state.
*/
charref_len = snprintf((char *) &charref[0], sizeof(charref),
"&#%d;", cur);
xmlBufShrink(in, len);
xmlBufAddHead(in, charref, -1);
/*
* Removes the UTF8 sequence, and replace it by a charref
* and continue the transcoding phase, hoping the error
* did not mangle the encoder state.
*/
charrefLen = snprintf((char *) &charref[0], sizeof(charref),
"&#%d;", cur);
xmlBufShrink(in, len);
xmlBufGrow(out, charrefLen * 4);
c_out = xmlBufAvail(out) - 1;
c_in = charrefLen;
ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
charref, &c_in);
goto retry;
} else {
if ((ret < 0) || (c_in != charrefLen)) {
char buf[50];
snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
@ -2539,8 +2529,12 @@ retry:
buf);
if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
content[0] = ' ';
break;
}
break;
xmlBufAddLen(out, c_out);
writtentot += c_out;
goto retry;
}
}
return(ret);
@ -2573,7 +2567,6 @@ xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
int writtentot = 0;
int toconv;
int output = 0;
int charref_len = 0;
if (handler == NULL) return(-1);
if (out == NULL) return(-1);
@ -2621,7 +2614,6 @@ retry:
if (ret == -1) {
if (written > 0) {
/* Can be a limitation of iconv or uconv */
charref_len = 0;
goto retry;
}
ret = -3;
@ -2658,46 +2650,38 @@ retry:
ret = -1;
break;
case -2: {
xmlChar charref[20];
int len = in->use;
const xmlChar *utf = (const xmlChar *) in->content;
int cur;
int cur, charrefLen;
cur = xmlGetUTF8Char(utf, &len);
if ((charref_len != 0) && (written < charref_len)) {
/*
* We attempted to insert a character reference and failed.
* Undo what was written and skip the remaining charref.
*/
out->use -= written;
writtentot -= written;
xmlBufferShrink(in, charref_len - written);
charref_len = 0;
ret = -1;
if (cur <= 0)
break;
} else if (cur > 0) {
xmlChar charref[20];
#ifdef DEBUG_ENCODING
xmlGenericError(xmlGenericErrorContext,
"handling output conversion error\n");
xmlGenericError(xmlGenericErrorContext,
"Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
in->content[0], in->content[1],
in->content[2], in->content[3]);
xmlGenericError(xmlGenericErrorContext,
"handling output conversion error\n");
xmlGenericError(xmlGenericErrorContext,
"Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
in->content[0], in->content[1],
in->content[2], in->content[3]);
#endif
/*
* Removes the UTF8 sequence, and replace it by a charref
* and continue the transcoding phase, hoping the error
* did not mangle the encoder state.
*/
charref_len = snprintf((char *) &charref[0], sizeof(charref),
"&#%d;", cur);
xmlBufferShrink(in, len);
xmlBufferAddHead(in, charref, -1);
/*
* Removes the UTF8 sequence, and replace it by a charref
* and continue the transcoding phase, hoping the error
* did not mangle the encoder state.
*/
charrefLen = snprintf((char *) &charref[0], sizeof(charref),
"&#%d;", cur);
xmlBufferShrink(in, len);
xmlBufferGrow(out, charrefLen * 4);
written = out->size - out->use - 1;
toconv = charrefLen;
ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
charref, &toconv);
goto retry;
} else {
if ((ret < 0) || (toconv != charrefLen)) {
char buf[50];
snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
@ -2709,8 +2693,13 @@ retry:
buf);
if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
in->content[0] = ' ';
break;
}
break;
out->use += written;
writtentot += written;
out->content[out->use] = 0;
goto retry;
}
}
return(ret);