1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-03-27 18:50:07 +03:00

xmlregexp: add support for compact form of automata in xmlRegexpPrint

This commit is contained in:
Florin Haja 2025-02-22 19:29:07 +00:00 committed by Nick Wellnhofer
parent c82270a9a7
commit 4649f28f77

View File

@ -1099,6 +1099,21 @@ xmlRegPrintAtom(FILE *output, xmlRegAtomPtr atom) {
}
}
static void
xmlRegPrintAtomCompact(FILE* output, xmlRegexpPtr regexp, int atom)
{
if (output == NULL || regexp == NULL || atom < 0 ||
atom >= regexp->nbstrings) {
return;
}
fprintf(output, " atom: ");
xmlRegPrintAtomType(output, XML_REGEXP_STRING);
xmlRegPrintQuantType(output, XML_REGEXP_QUANT_ONCE);
fprintf(output, "'%s' ", (char *) regexp->stringMap[atom]);
fprintf(output, "\n");
}
static void
xmlRegPrintTrans(FILE *output, xmlRegTransPtr trans) {
fprintf(output, " trans: ");
@ -1133,6 +1148,59 @@ xmlRegPrintTrans(FILE *output, xmlRegTransPtr trans) {
fprintf(output, "atom %d, to %d\n", trans->atom->no, trans->to);
}
static void
xmlRegPrintTransCompact(
FILE* output,
xmlRegexpPtr regexp,
int state,
int atom
)
{
int target;
if (output == NULL || regexp == NULL || regexp->compact == NULL ||
state < 0 || atom < 0) {
return;
}
target = regexp->compact[state * (regexp->nbstrings + 1) + atom + 1];
fprintf(output, " trans: ");
/* TODO maybe skip 'removed' transitions, because they actually never existed */
if (target < 0) {
fprintf(output, "removed\n");
return;
}
/* We will ignore most of the attributes used in xmlRegPrintTrans,
* since the compact form is much simpler and uses only a part of the
* features provided by the libxml2 regexp libary
* (no rollbacks, counters etc.) */
/* Compared to the standard representation, an automata written using the
* compact form will ALWAYS be deterministic!
* From xmlRegPrintTrans:
if (trans->nd != 0) {
...
* trans->nd will always be 0! */
/* In automata represented in compact form, the transitions will not use
* counters.
* From xmlRegPrintTrans:
if (trans->counter >= 0) {
...
* regexp->counters == NULL, so trans->counter < 0 */
/* In compact form, we won't use */
/* An automata in the compact representation will always use string
* atoms.
* From xmlRegPrintTrans:
if (trans->atom->type == XML_REGEXP_CHARVAL)
...
* trans->atom != NULL && trans->atom->type == XML_REGEXP_STRING */
fprintf(output, "atom %d, to %d\n", atom, target);
}
static void
xmlRegPrintState(FILE *output, xmlRegStatePtr state) {
int i;
@ -1153,6 +1221,87 @@ xmlRegPrintState(FILE *output, xmlRegStatePtr state) {
}
}
static void
xmlRegPrintStateCompact(FILE* output, xmlRegexpPtr regexp, int state)
{
int nbTrans = 0;
int i;
int target;
xmlRegStateType stateType;
if (output == NULL || regexp == NULL || regexp->compact == NULL ||
state < 0) {
return;
}
fprintf(output, " state: ");
stateType = regexp->compact[state * (regexp->nbstrings + 1)];
if (stateType == XML_REGEXP_START_STATE) {
fprintf(output, " START ");
}
if (stateType == XML_REGEXP_FINAL_STATE) {
fprintf(output, " FINAL ");
}
/* Print all atoms. */
for (i = 0; i < regexp->nbstrings; i++) {
xmlRegPrintAtomCompact(output, regexp, i);
}
/* Count all the transitions from the compact representation. */
for (i = 0; i < regexp->nbstrings; i++) {
target = regexp->compact[state * (regexp->nbstrings + 1) + i + 1];
if (target > 0 && target <= regexp->nbstates &&
regexp->compact[(target - 1) * (regexp->nbstrings + 1)] ==
XML_REGEXP_SINK_STATE) {
nbTrans++;
}
}
fprintf(output, "%d, %d transitions:\n", state, nbTrans);
/* Print all transitions */
for (i = 0; i < regexp->nbstrings; i++) {
xmlRegPrintTransCompact(output, regexp, state, i);
}
}
/*
* xmlRegPrintCompact
* @output an output stream
* @regexp the regexp instance
*
* Print the compact representation of a regexp, in the same fashion as the
* public xmlRegexpPrint function.
*/
static void
xmlRegPrintCompact(FILE* output, xmlRegexpPtr regexp)
{
int i;
if (output == NULL || regexp == NULL || regexp->compact == NULL) {
return;
}
fprintf(output, "'%s' ", regexp->string);
fprintf(output, "%d atoms:\n", regexp->nbstrings);
fprintf(output, "\n");
for (i = 0; i < regexp->nbstrings; i++) {
fprintf(output, " %02d ", i);
xmlRegPrintAtomCompact(output, regexp, i);
}
fprintf(output, "%d states:", regexp->nbstates);
fprintf(output, "\n");
for (i = 0; i < regexp->nbstates; i++) {
xmlRegPrintStateCompact(output, regexp, i);
}
fprintf(output, "%d counters:\n", 0);
}
/************************************************************************
* *
* Finite Automata structures manipulations *
@ -5223,6 +5372,11 @@ xmlRegexpPrint(FILE *output, xmlRegexpPtr regexp) {
fprintf(output, "NULL\n");
return;
}
if (regexp->compact) {
xmlRegPrintCompact(output, regexp);
return;
}
fprintf(output, "'%s' ", regexp->string);
fprintf(output, "\n");
fprintf(output, "%d atoms:\n", regexp->nbAtoms);