1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2024-12-24 21:33:51 +03:00

extended xmlRegExecErrInfo() and xmlRegExecNextValues() to return error

* include/libxml/xmlregexp.h xmlregexp.c: extended xmlRegExecErrInfo()
  and xmlRegExecNextValues() to return error transition strings too,
  and sink state detection and handling.
Daniel
This commit is contained in:
Daniel Veillard 2005-01-12 13:21:17 +00:00
parent 618732b607
commit cc026dc6b0
3 changed files with 103 additions and 20 deletions

View File

@ -1,3 +1,9 @@
Wed Jan 12 14:17:14 CET 2005 Daniel Veillard <daniel@veillard.com>
* include/libxml/xmlregexp.h xmlregexp.c: extended xmlRegExecErrInfo()
and xmlRegExecNextValues() to return error transition strings too,
and sink state detection and handling.
Tue Jan 11 14:41:47 CET 2005 Daniel Veillard <daniel@veillard.com>
* xmlschemas.c: fixed bug #163641 when the value passed for

View File

@ -89,12 +89,14 @@ XMLPUBFUN int XMLCALL
XMLPUBFUN int XMLCALL
xmlRegExecNextValues(xmlRegExecCtxtPtr exec,
int *nbval,
int *nbneg,
xmlChar **values,
int *terminal);
XMLPUBFUN int XMLCALL
xmlRegExecErrInfo (xmlRegExecCtxtPtr exec,
const xmlChar **string,
int *nbval,
int *nbneg,
xmlChar **values,
int *terminal);
#ifdef __cplusplus

View File

@ -139,7 +139,8 @@ typedef enum {
typedef enum {
XML_REGEXP_START_STATE = 1,
XML_REGEXP_FINAL_STATE,
XML_REGEXP_TRANS_STATE
XML_REGEXP_TRANS_STATE,
XML_REGEXP_SINK_STATE
} xmlRegStateType;
typedef enum {
@ -207,7 +208,6 @@ struct _xmlAutomataState {
xmlRegMarkedType mark;
xmlRegMarkedType reached;
int no;
int maxTrans;
int nbTrans;
xmlRegTrans *trans;
@ -1596,11 +1596,16 @@ xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
/*
* build the completed transitions bypassing the epsilons
* Use a marking algorithm to avoid loops
* mark sink states too.
*/
for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
state = ctxt->states[statenr];
if (state == NULL)
continue;
if ((state->nbTrans == 0) &&
(state->type != XML_REGEXP_FINAL_STATE)) {
state->type = XML_REGEXP_SINK_STATE;
}
for (transnr = 0;transnr < state->nbTrans;transnr++) {
if ((state->trans[transnr].atom == NULL) &&
(state->trans[transnr].to >= 0)) {
@ -1677,6 +1682,7 @@ xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
}
}
}
/*
* find the next accessible state not explored
*/
@ -2697,6 +2703,10 @@ xmlRegCompactPushString(xmlRegExecCtxtPtr exec,
#ifdef DEBUG_PUSH
printf("entering state %d\n", target);
#endif
if (comp->compact[target * (comp->nbstrings + 1)] ==
XML_REGEXP_SINK_STATE)
goto error;
if (comp->compact[target * (comp->nbstrings + 1)] ==
XML_REGEXP_FINAL_STATE)
return(1);
@ -2711,6 +2721,7 @@ xmlRegCompactPushString(xmlRegExecCtxtPtr exec,
#ifdef DEBUG_PUSH
printf("failed to find a transition for %s on state %d\n", value, state);
#endif
error:
if (exec->errString != NULL)
xmlFree(exec->errString);
exec->errString = xmlStrdup(value);
@ -2975,6 +2986,20 @@ xmlRegExecPushString(xmlRegExecCtxtPtr exec, const xmlChar *value,
#ifdef DEBUG_PUSH
printf("entering state %d\n", trans->to);
#endif
if ((exec->comp->states[trans->to] != NULL) &&
(exec->comp->states[trans->to]->type ==
XML_REGEXP_SINK_STATE)) {
/*
* entering a sink state, save the current state as error
* state.
*/
if (exec->errString != NULL)
xmlFree(exec->errString);
exec->errString = xmlStrdup(value);
exec->errState = exec->state;
memcpy(exec->errCounts, exec->counts,
exec->comp->nbCounters * sizeof(int));
}
exec->state = exec->comp->states[trans->to];
exec->transno = 0;
if (trans->atom != NULL) {
@ -3010,10 +3035,11 @@ xmlRegExecPushString(xmlRegExecCtxtPtr exec, const xmlChar *value,
if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
rollback:
/*
* if we didn't yet rollback on the current input store
* the current state as the error state.
* if we didn't yet rollback on the current input
* store the current state as the error state.
*/
if (progress) {
if ((progress) && (exec->state != NULL) &&
(exec->state->type != XML_REGEXP_SINK_STATE)) {
progress = 0;
if (exec->errString != NULL)
xmlFree(exec->errString);
@ -3113,6 +3139,7 @@ xmlRegExecPushString2(xmlRegExecCtxtPtr exec, const xmlChar *value,
* @exec: a regexp execution context
* @err: error extraction or normal one
* @nbval: pointer to the number of accepted values IN/OUT
* @nbneg: return number of negative transitions
* @values: pointer to the array of acceptable values
* @terminal: return value if this was a terminal state
*
@ -3123,14 +3150,18 @@ xmlRegExecPushString2(xmlRegExecCtxtPtr exec, const xmlChar *value,
*/
static int
xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
int *nbval, xmlChar **values, int *terminal) {
int *nbval, int *nbneg,
xmlChar **values, int *terminal) {
int maxval;
int nb = 0;
if ((exec == NULL) || (nbval == NULL) || (values == NULL) || (*nbval <= 0))
if ((exec == NULL) || (nbval == NULL) || (nbneg == NULL) ||
(values == NULL) || (*nbval <= 0))
return(-1);
maxval = *nbval;
*nbval = 0;
*nbneg = 0;
if ((exec->comp != NULL) && (exec->comp->compact != NULL)) {
xmlRegexpPtr comp;
int target, i, state;
@ -3150,13 +3181,24 @@ xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
else
*terminal = 0;
}
for (i = 0;(i < comp->nbstrings) && (*nbval < maxval);i++) {
for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) {
target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
if ((target > 0) && (target <= comp->nbstates)) {
values[*nbval] = comp->stringMap[i];
if ((target > 0) && (target <= comp->nbstates) &&
(comp->compact[(target - 1) * (comp->nbstrings + 1)] !=
XML_REGEXP_SINK_STATE)) {
values[nb++] = comp->stringMap[i];
(*nbval)++;
}
}
for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) {
target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
if ((target > 0) && (target <= comp->nbstates) &&
(comp->compact[(target - 1) * (comp->nbstrings + 1)] ==
XML_REGEXP_SINK_STATE)) {
values[nb++] = comp->stringMap[i];
(*nbneg)++;
}
}
} else {
int transno;
xmlRegTransPtr trans;
@ -3178,7 +3220,7 @@ xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
state = exec->state;
}
for (transno = 0;
(transno < state->nbTrans) && (*nbval < maxval);
(transno < state->nbTrans) && (nb < maxval);
transno++) {
trans = &state->trans[transno];
if (trans->to < 0)
@ -3187,8 +3229,10 @@ xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
if ((atom == NULL) || (atom->valuep == NULL))
continue;
if (trans->count == REGEXP_ALL_LAX_COUNTER) {
/* this should not be reached but ... */
TODO;
} else if (trans->count == REGEXP_ALL_COUNTER) {
/* this should not be reached but ... */
TODO;
} else if (trans->counter >= 0) {
xmlRegCounterPtr counter;
@ -3200,12 +3244,40 @@ xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
count = exec->counts[trans->counter];
counter = &exec->comp->counters[trans->counter];
if (count < counter->max) {
values[*nbval] = (xmlChar *) atom->valuep;
values[nb++] = (xmlChar *) atom->valuep;
(*nbval)++;
}
} else {
values[*nbval] = (xmlChar *) atom->valuep;
(*nbval)++;
if ((exec->comp->states[trans->to] != NULL) &&
(exec->comp->states[trans->to]->type !=
XML_REGEXP_SINK_STATE)) {
values[nb++] = (xmlChar *) atom->valuep;
(*nbval)++;
}
}
}
for (transno = 0;
(transno < state->nbTrans) && (nb < maxval);
transno++) {
trans = &state->trans[transno];
if (trans->to < 0)
continue;
atom = trans->atom;
if ((atom == NULL) || (atom->valuep == NULL))
continue;
if (trans->count == REGEXP_ALL_LAX_COUNTER) {
continue;
} else if (trans->count == REGEXP_ALL_COUNTER) {
continue;
} else if (trans->counter >= 0) {
continue;
} else {
if ((exec->comp->states[trans->to] != NULL) &&
(exec->comp->states[trans->to]->type ==
XML_REGEXP_SINK_STATE)) {
values[nb++] = (xmlChar *) atom->valuep;
(*nbneg)++;
}
}
}
}
@ -3216,6 +3288,7 @@ xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
* xmlRegExecNextValues:
* @exec: a regexp execution context
* @nbval: pointer to the number of accepted values IN/OUT
* @nbneg: return number of negative transitions
* @values: pointer to the array of acceptable values
* @terminal: return value if this was a terminal state
*
@ -3229,9 +3302,9 @@ xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
* Returns: 0 in case of success or -1 in case of error.
*/
int
xmlRegExecNextValues(xmlRegExecCtxtPtr exec, int *nbval, xmlChar **values,
int *terminal) {
return(xmlRegExecGetValues(exec, 0, nbval, values, terminal));
xmlRegExecNextValues(xmlRegExecCtxtPtr exec, int *nbval, int *nbneg,
xmlChar **values, int *terminal) {
return(xmlRegExecGetValues(exec, 0, nbval, nbneg, values, terminal));
}
/**
@ -3239,6 +3312,7 @@ xmlRegExecNextValues(xmlRegExecCtxtPtr exec, int *nbval, xmlChar **values,
* @exec: a regexp execution context generating an error
* @string: return value for the error string
* @nbval: pointer to the number of accepted values IN/OUT
* @nbneg: return number of negative transitions
* @values: pointer to the array of acceptable values
* @terminal: return value if this was a terminal state
*
@ -3254,7 +3328,7 @@ xmlRegExecNextValues(xmlRegExecCtxtPtr exec, int *nbval, xmlChar **values,
*/
int
xmlRegExecErrInfo(xmlRegExecCtxtPtr exec, const xmlChar **string,
int *nbval, xmlChar **values, int *terminal) {
int *nbval, int *nbneg, xmlChar **values, int *terminal) {
if (exec == NULL)
return(-1);
if (string != NULL) {
@ -3263,7 +3337,7 @@ xmlRegExecErrInfo(xmlRegExecCtxtPtr exec, const xmlChar **string,
else
*string = NULL;
}
return(xmlRegExecGetValues(exec, 1, nbval, values, terminal));
return(xmlRegExecGetValues(exec, 1, nbval, nbneg, values, terminal));
}
#ifdef DEBUG_ERR
@ -3271,8 +3345,9 @@ static void testerr(xmlRegExecCtxtPtr exec) {
const xmlChar *string;
const xmlChar *values[5];
int nb = 5;
int nbneg;
int terminal;
xmlRegExecErrInfo(exec, &string, &nb, &values[0], &terminal);
xmlRegExecErrInfo(exec, &string, &nb, &nbneg, &values[0], &terminal);
}
#endif