mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-02-02 21:47:01 +03:00
Fix quadratic runtime when push parsing HTML entity refs
The HTML push parser would look ahead for characters in "; >/" to terminate an entity reference but actual parsing could stop earlier, potentially resulting in quadratic runtime. Parse char data and references alternately in htmlParseTryOrFinish and only look ahead once for a terminating '<' character. Found by OSS-Fuzz.
This commit is contained in:
parent
8e219b154e
commit
6995eed077
77
HTMLparser.c
77
HTMLparser.c
@ -5249,61 +5249,6 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
|
|||||||
return (-1);
|
return (-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* htmlParseLookupChars:
|
|
||||||
* @ctxt: an HTML parser context
|
|
||||||
* @stop: Array of chars, which stop the lookup.
|
|
||||||
* @stopLen: Length of stop-Array
|
|
||||||
*
|
|
||||||
* Try to find if any char of the stop-Array is available in the input
|
|
||||||
* stream.
|
|
||||||
* This function has a side effect of (possibly) incrementing ctxt->checkIndex
|
|
||||||
* to avoid rescanning sequences of bytes, it DOES change the state of the
|
|
||||||
* parser, do not use liberally.
|
|
||||||
*
|
|
||||||
* Returns the index to the current parsing point if a stopChar
|
|
||||||
* is available, -1 otherwise.
|
|
||||||
*/
|
|
||||||
static int
|
|
||||||
htmlParseLookupChars(htmlParserCtxtPtr ctxt, const xmlChar * stop,
|
|
||||||
int stopLen)
|
|
||||||
{
|
|
||||||
int base, len;
|
|
||||||
htmlParserInputPtr in;
|
|
||||||
const xmlChar *buf;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
in = ctxt->input;
|
|
||||||
if (in == NULL)
|
|
||||||
return (-1);
|
|
||||||
|
|
||||||
base = in->cur - in->base;
|
|
||||||
if (base < 0)
|
|
||||||
return (-1);
|
|
||||||
|
|
||||||
if (ctxt->checkIndex > base)
|
|
||||||
base = ctxt->checkIndex;
|
|
||||||
|
|
||||||
if (in->buf == NULL) {
|
|
||||||
buf = in->base;
|
|
||||||
len = in->length;
|
|
||||||
} else {
|
|
||||||
buf = xmlBufContent(in->buf->buffer);
|
|
||||||
len = xmlBufUse(in->buf->buffer);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (; base < len; base++) {
|
|
||||||
for (i = 0; i < stopLen; ++i) {
|
|
||||||
if (buf[base] == stop[i]) {
|
|
||||||
ctxt->checkIndex = 0;
|
|
||||||
return (base - (in->cur - in->base));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ctxt->checkIndex = base;
|
|
||||||
return (-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* htmlParseTryOrFinish:
|
* htmlParseTryOrFinish:
|
||||||
* @ctxt: an HTML parser context
|
* @ctxt: an HTML parser context
|
||||||
@ -5893,17 +5838,6 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
|||||||
"HPP: entering START_TAG\n");
|
"HPP: entering START_TAG\n");
|
||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
} else if (cur == '&') {
|
|
||||||
if ((!terminate) &&
|
|
||||||
(htmlParseLookupChars(ctxt,
|
|
||||||
BAD_CAST "; >/", 4) < 0))
|
|
||||||
goto done;
|
|
||||||
#ifdef DEBUG_PUSH
|
|
||||||
xmlGenericError(xmlGenericErrorContext,
|
|
||||||
"HPP: Parsing Reference\n");
|
|
||||||
#endif
|
|
||||||
/* TODO: check generation of subtrees if noent !!! */
|
|
||||||
htmlParseReference(ctxt);
|
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
* check that the text sequence is complete
|
* check that the text sequence is complete
|
||||||
@ -5912,14 +5846,21 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
|||||||
* data detection.
|
* data detection.
|
||||||
*/
|
*/
|
||||||
if ((!terminate) &&
|
if ((!terminate) &&
|
||||||
(htmlParseLookupChars(ctxt, BAD_CAST "<&", 2) < 0))
|
(htmlParseLookupSequence(ctxt, '<', 0, 0, 0) < 0))
|
||||||
goto done;
|
goto done;
|
||||||
ctxt->checkIndex = 0;
|
ctxt->checkIndex = 0;
|
||||||
#ifdef DEBUG_PUSH
|
#ifdef DEBUG_PUSH
|
||||||
xmlGenericError(xmlGenericErrorContext,
|
xmlGenericError(xmlGenericErrorContext,
|
||||||
"HPP: Parsing char data\n");
|
"HPP: Parsing char data\n");
|
||||||
#endif
|
#endif
|
||||||
htmlParseCharData(ctxt);
|
while ((cur != '<') && (cur != 0)) {
|
||||||
|
if (cur == '&') {
|
||||||
|
htmlParseReference(ctxt);
|
||||||
|
} else {
|
||||||
|
htmlParseCharData(ctxt);
|
||||||
|
}
|
||||||
|
cur = in->cur[0];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (cons == ctxt->nbChars) {
|
if (cons == ctxt->nbChars) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user