1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2024-10-26 20:25:14 +03:00

Fix exponential runtime in xmlFARecurseDeterminism

In order to prevent visiting a state twice, states must be marked as
visited for the whole duration of graph traversal because states might
be reached by different paths. Otherwise state graphs like the
following can lead to exponential runtime:

  ->O-->O-->O-->O-->O->
     \ / \ / \ / \ /
      O   O   O   O

Reset the "visited" flag only after the graph was traversed.

xmlFAComputesDeterminism still has massive performance problems when
handling fuzzed input. By design, it has quadratic time complexity in
the number of reachable states. Some issues might also stem from
redundant epsilon transitions. With this fix, fuzzing regexes with a
maximum length of 100 becomes feasible at least.

Found with libFuzzer.
This commit is contained in:
Nick Wellnhofer 2020-07-11 21:32:10 +02:00
parent 1a360c1c2e
commit 68eadabd00

View File

@ -2658,7 +2658,6 @@ xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
state->markd = XML_REGEXP_MARK_VISITED; state->markd = XML_REGEXP_MARK_VISITED;
res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to], res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
to, atom); to, atom);
state->markd = 0;
if (res == 0) { if (res == 0) {
ret = 0; ret = 0;
/* t1->nd = 1; */ /* t1->nd = 1; */
@ -2676,6 +2675,30 @@ xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
return(ret); return(ret);
} }
/**
* xmlFAFinishRecurseDeterminism:
* @ctxt: a regexp parser context
*
* Reset flags after checking determinism.
*/
static void
xmlFAFinishRecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) {
int transnr, nbTrans;
if (state == NULL)
return;
if (state->markd != XML_REGEXP_MARK_VISITED)
return;
state->markd = 0;
nbTrans = state->nbTrans;
for (transnr = 0; transnr < nbTrans; transnr++) {
xmlRegTransPtr t1 = &state->trans[transnr];
if ((t1->atom == NULL) && (t1->to >= 0))
xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t1->to]);
}
}
/** /**
* xmlFAComputesDeterminism: * xmlFAComputesDeterminism:
* @ctxt: a regexp parser context * @ctxt: a regexp parser context
@ -2789,6 +2812,7 @@ xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
*/ */
ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to], ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
t2->to, t2->atom); t2->to, t2->atom);
xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t1->to]);
/* don't shortcut the computation so all non deterministic /* don't shortcut the computation so all non deterministic
transition get marked down transition get marked down
if (ret == 0) if (ret == 0)