From 68eadabd0055cba39c4ea1acfa8931d0d10a44e5 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Sat, 11 Jul 2020 21:32:10 +0200 Subject: [PATCH] Fix exponential runtime in xmlFARecurseDeterminism In order to prevent visiting a state twice, states must be marked as visited for the whole duration of graph traversal because states might be reached by different paths. Otherwise state graphs like the following can lead to exponential runtime: ->O-->O-->O-->O-->O-> \ / \ / \ / \ / O O O O Reset the "visited" flag only after the graph was traversed. xmlFAComputesDeterminism still has massive performance problems when handling fuzzed input. By design, it has quadratic time complexity in the number of reachable states. Some issues might also stem from redundant epsilon transitions. With this fix, fuzzing regexes with a maximum length of 100 becomes feasible at least. Found with libFuzzer. --- xmlregexp.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/xmlregexp.c b/xmlregexp.c index dbf3bf2c..f971f0c8 100644 --- a/xmlregexp.c +++ b/xmlregexp.c @@ -2658,7 +2658,6 @@ xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state, state->markd = XML_REGEXP_MARK_VISITED; res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to], to, atom); - state->markd = 0; if (res == 0) { ret = 0; /* t1->nd = 1; */ @@ -2676,6 +2675,30 @@ xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state, return(ret); } +/** + * xmlFAFinishRecurseDeterminism: + * @ctxt: a regexp parser context + * + * Reset flags after checking determinism. + */ +static void +xmlFAFinishRecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) { + int transnr, nbTrans; + + if (state == NULL) + return; + if (state->markd != XML_REGEXP_MARK_VISITED) + return; + state->markd = 0; + + nbTrans = state->nbTrans; + for (transnr = 0; transnr < nbTrans; transnr++) { + xmlRegTransPtr t1 = &state->trans[transnr]; + if ((t1->atom == NULL) && (t1->to >= 0)) + xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t1->to]); + } +} + /** * xmlFAComputesDeterminism: * @ctxt: a regexp parser context @@ -2789,6 +2812,7 @@ xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) { */ ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to], t2->to, t2->atom); + xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t1->to]); /* don't shortcut the computation so all non deterministic transition get marked down if (ret == 0)