1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-02-02 21:47:01 +03:00

Fix quadratic runtime when push parsing HTML entity refs

The HTML push parser would look ahead for characters in "; >/" to
terminate an entity reference but actual parsing could stop earlier,
potentially resulting in quadratic runtime.

Parse char data and references alternately in htmlParseTryOrFinish
and only look ahead once for a terminating '<' character.

Found by OSS-Fuzz.
This commit is contained in:
Nick Wellnhofer 2020-07-19 13:54:52 +02:00
parent 8e219b154e
commit 6995eed077

View File

@ -5249,61 +5249,6 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
return (-1); return (-1);
} }
/**
* htmlParseLookupChars:
* @ctxt: an HTML parser context
* @stop: Array of chars, which stop the lookup.
* @stopLen: Length of stop-Array
*
* Try to find if any char of the stop-Array is available in the input
* stream.
* This function has a side effect of (possibly) incrementing ctxt->checkIndex
* to avoid rescanning sequences of bytes, it DOES change the state of the
* parser, do not use liberally.
*
* Returns the index to the current parsing point if a stopChar
* is available, -1 otherwise.
*/
static int
htmlParseLookupChars(htmlParserCtxtPtr ctxt, const xmlChar * stop,
int stopLen)
{
int base, len;
htmlParserInputPtr in;
const xmlChar *buf;
int i;
in = ctxt->input;
if (in == NULL)
return (-1);
base = in->cur - in->base;
if (base < 0)
return (-1);
if (ctxt->checkIndex > base)
base = ctxt->checkIndex;
if (in->buf == NULL) {
buf = in->base;
len = in->length;
} else {
buf = xmlBufContent(in->buf->buffer);
len = xmlBufUse(in->buf->buffer);
}
for (; base < len; base++) {
for (i = 0; i < stopLen; ++i) {
if (buf[base] == stop[i]) {
ctxt->checkIndex = 0;
return (base - (in->cur - in->base));
}
}
}
ctxt->checkIndex = base;
return (-1);
}
/** /**
* htmlParseTryOrFinish: * htmlParseTryOrFinish:
* @ctxt: an HTML parser context * @ctxt: an HTML parser context
@ -5893,17 +5838,6 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
"HPP: entering START_TAG\n"); "HPP: entering START_TAG\n");
#endif #endif
break; break;
} else if (cur == '&') {
if ((!terminate) &&
(htmlParseLookupChars(ctxt,
BAD_CAST "; >/", 4) < 0))
goto done;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"HPP: Parsing Reference\n");
#endif
/* TODO: check generation of subtrees if noent !!! */
htmlParseReference(ctxt);
} else { } else {
/* /*
* check that the text sequence is complete * check that the text sequence is complete
@ -5912,14 +5846,21 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
* data detection. * data detection.
*/ */
if ((!terminate) && if ((!terminate) &&
(htmlParseLookupChars(ctxt, BAD_CAST "<&", 2) < 0)) (htmlParseLookupSequence(ctxt, '<', 0, 0, 0) < 0))
goto done; goto done;
ctxt->checkIndex = 0; ctxt->checkIndex = 0;
#ifdef DEBUG_PUSH #ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext, xmlGenericError(xmlGenericErrorContext,
"HPP: Parsing char data\n"); "HPP: Parsing char data\n");
#endif #endif
htmlParseCharData(ctxt); while ((cur != '<') && (cur != 0)) {
if (cur == '&') {
htmlParseReference(ctxt);
} else {
htmlParseCharData(ctxt);
}
cur = in->cur[0];
}
} }
} }
if (cons == ctxt->nbChars) { if (cons == ctxt->nbChars) {