2002-04-16 19:50:10 +04:00
/*
* regexp . c : generic and extensible Regular Expression engine
*
2012-09-11 09:26:36 +04:00
* Basically designed with the purpose of compiling regexps for
2019-09-30 18:04:54 +03:00
* the variety of validation / schemas mechanisms now available in
2004-05-06 08:17:26 +04:00
* XML related specifications these include :
2002-04-16 19:50:10 +04:00
* - XML - 1.0 DTD validation
* - XML Schemas structure part 1
* - XML Schemas Datatypes part 2 especially Appendix F
* - RELAX - NG / TREX i . e . the counter proposal
*
* See Copyright for the status of this software .
*
* Daniel Veillard < veillard @ redhat . com >
*/
# define IN_LIBXML
# include "libxml.h"
# ifdef LIBXML_REGEXP_ENABLED
# include <stdio.h>
# include <string.h>
2003-12-03 15:12:27 +03:00
# include <limits.h>
2022-03-02 02:29:17 +03:00
2002-04-16 19:50:10 +04:00
# include <libxml/tree.h>
# include <libxml/parserInternals.h>
# include <libxml/xmlregexp.h>
# include <libxml/xmlautomata.h>
# include <libxml/xmlunicode.h>
2022-08-26 02:22:33 +03:00
# include "private/error.h"
# include "private/regexp.h"
2020-01-02 16:45:28 +03:00
# ifndef SIZE_MAX
# define SIZE_MAX ((size_t) -1)
# endif
2003-12-03 15:12:27 +03:00
2005-10-18 23:11:55 +04:00
# define MAX_PUSH 10000000
2005-09-15 17:09:00 +04:00
2012-05-10 16:24:00 +04:00
# ifdef ERROR
# undef ERROR
# endif
2003-10-08 12:53:17 +04:00
# define ERROR(str) \
ctxt - > error = XML_REGEXP_COMPILE_ERROR ; \
xmlRegexpErrCompile ( ctxt , str ) ;
2002-04-16 19:50:10 +04:00
# define NEXT ctxt->cur++
# define CUR (*(ctxt->cur))
# define NXT(index) (ctxt->cur[index])
# define NEXTL(l) ctxt->cur += l;
2004-08-10 18:17:33 +04:00
# define XML_REG_STRING_SEPARATOR '|'
2007-03-21 16:16:33 +03:00
/*
* Need PREV to check on a ' - ' within a Character Group . May only be used
* when it ' s guaranteed that cur is not at the beginning of ctxt - > string !
*/
# define PREV (ctxt->cur[-1])
2002-04-16 19:50:10 +04:00
/************************************************************************
2012-09-11 09:26:36 +04:00
* *
* Datatypes and structures *
* *
2002-04-16 19:50:10 +04:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2006-02-12 22:14:15 +03:00
/*
* Note : the order of the enums below is significant , do not shuffle
*/
2002-04-16 19:50:10 +04:00
typedef enum {
XML_REGEXP_EPSILON = 1 ,
XML_REGEXP_CHARVAL ,
XML_REGEXP_RANGES ,
2005-10-18 23:11:55 +04:00
XML_REGEXP_SUBREG , /* used for () sub regexps */
2002-04-16 19:50:10 +04:00
XML_REGEXP_STRING ,
XML_REGEXP_ANYCHAR , /* . */
XML_REGEXP_ANYSPACE , /* \s */
XML_REGEXP_NOTSPACE , /* \S */
XML_REGEXP_INITNAME , /* \l */
2005-10-18 23:11:55 +04:00
XML_REGEXP_NOTINITNAME , /* \L */
2002-04-16 19:50:10 +04:00
XML_REGEXP_NAMECHAR , /* \c */
XML_REGEXP_NOTNAMECHAR , /* \C */
XML_REGEXP_DECIMAL , /* \d */
2005-10-18 23:11:55 +04:00
XML_REGEXP_NOTDECIMAL , /* \D */
2002-04-16 19:50:10 +04:00
XML_REGEXP_REALCHAR , /* \w */
2005-10-18 23:11:55 +04:00
XML_REGEXP_NOTREALCHAR , /* \W */
XML_REGEXP_LETTER = 100 ,
2002-04-16 19:50:10 +04:00
XML_REGEXP_LETTER_UPPERCASE ,
XML_REGEXP_LETTER_LOWERCASE ,
XML_REGEXP_LETTER_TITLECASE ,
XML_REGEXP_LETTER_MODIFIER ,
XML_REGEXP_LETTER_OTHERS ,
XML_REGEXP_MARK ,
XML_REGEXP_MARK_NONSPACING ,
XML_REGEXP_MARK_SPACECOMBINING ,
XML_REGEXP_MARK_ENCLOSING ,
XML_REGEXP_NUMBER ,
XML_REGEXP_NUMBER_DECIMAL ,
XML_REGEXP_NUMBER_LETTER ,
XML_REGEXP_NUMBER_OTHERS ,
XML_REGEXP_PUNCT ,
XML_REGEXP_PUNCT_CONNECTOR ,
XML_REGEXP_PUNCT_DASH ,
XML_REGEXP_PUNCT_OPEN ,
XML_REGEXP_PUNCT_CLOSE ,
XML_REGEXP_PUNCT_INITQUOTE ,
XML_REGEXP_PUNCT_FINQUOTE ,
XML_REGEXP_PUNCT_OTHERS ,
XML_REGEXP_SEPAR ,
XML_REGEXP_SEPAR_SPACE ,
XML_REGEXP_SEPAR_LINE ,
XML_REGEXP_SEPAR_PARA ,
XML_REGEXP_SYMBOL ,
XML_REGEXP_SYMBOL_MATH ,
XML_REGEXP_SYMBOL_CURRENCY ,
XML_REGEXP_SYMBOL_MODIFIER ,
XML_REGEXP_SYMBOL_OTHERS ,
XML_REGEXP_OTHER ,
XML_REGEXP_OTHER_CONTROL ,
XML_REGEXP_OTHER_FORMAT ,
XML_REGEXP_OTHER_PRIVATE ,
XML_REGEXP_OTHER_NA ,
XML_REGEXP_BLOCK_NAME
} xmlRegAtomType ;
typedef enum {
XML_REGEXP_QUANT_EPSILON = 1 ,
XML_REGEXP_QUANT_ONCE ,
XML_REGEXP_QUANT_OPT ,
XML_REGEXP_QUANT_MULT ,
XML_REGEXP_QUANT_PLUS ,
2002-04-20 10:41:40 +04:00
XML_REGEXP_QUANT_ONCEONLY ,
XML_REGEXP_QUANT_ALL ,
2002-04-16 19:50:10 +04:00
XML_REGEXP_QUANT_RANGE
} xmlRegQuantType ;
typedef enum {
XML_REGEXP_START_STATE = 1 ,
XML_REGEXP_FINAL_STATE ,
2005-01-12 16:21:17 +03:00
XML_REGEXP_TRANS_STATE ,
2006-11-01 18:33:04 +03:00
XML_REGEXP_SINK_STATE ,
XML_REGEXP_UNREACH_STATE
2002-04-16 19:50:10 +04:00
} xmlRegStateType ;
typedef enum {
XML_REGEXP_MARK_NORMAL = 0 ,
XML_REGEXP_MARK_START ,
XML_REGEXP_MARK_VISITED
} xmlRegMarkedType ;
typedef struct _xmlRegRange xmlRegRange ;
typedef xmlRegRange * xmlRegRangePtr ;
struct _xmlRegRange {
2003-11-24 17:27:26 +03:00
int neg ; /* 0 normal, 1 not, 2 exclude */
2002-04-16 19:50:10 +04:00
xmlRegAtomType type ;
int start ;
int end ;
xmlChar * blockName ;
} ;
typedef struct _xmlRegAtom xmlRegAtom ;
typedef xmlRegAtom * xmlRegAtomPtr ;
typedef struct _xmlAutomataState xmlRegState ;
typedef xmlRegState * xmlRegStatePtr ;
struct _xmlRegAtom {
int no ;
xmlRegAtomType type ;
xmlRegQuantType quant ;
int min ;
int max ;
void * valuep ;
2002-09-18 01:50:03 +04:00
void * valuep2 ;
2002-04-16 19:50:10 +04:00
int neg ;
int codepoint ;
xmlRegStatePtr start ;
2007-08-22 20:29:21 +04:00
xmlRegStatePtr start0 ;
2002-04-16 19:50:10 +04:00
xmlRegStatePtr stop ;
int maxRanges ;
int nbRanges ;
xmlRegRangePtr * ranges ;
void * data ;
} ;
typedef struct _xmlRegCounter xmlRegCounter ;
typedef xmlRegCounter * xmlRegCounterPtr ;
struct _xmlRegCounter {
int min ;
int max ;
} ;
typedef struct _xmlRegTrans xmlRegTrans ;
typedef xmlRegTrans * xmlRegTransPtr ;
struct _xmlRegTrans {
xmlRegAtomPtr atom ;
int to ;
int counter ;
int count ;
2005-10-18 23:11:55 +04:00
int nd ;
2002-04-16 19:50:10 +04:00
} ;
struct _xmlAutomataState {
xmlRegStateType type ;
xmlRegMarkedType mark ;
2012-08-27 08:03:40 +04:00
xmlRegMarkedType markd ;
2002-09-19 23:56:43 +04:00
xmlRegMarkedType reached ;
2002-04-16 19:50:10 +04:00
int no ;
int maxTrans ;
int nbTrans ;
xmlRegTrans * trans ;
2019-09-30 18:04:54 +03:00
/* knowing states pointing to us can speed things up */
2005-07-30 17:18:24 +04:00
int maxTransTo ;
int nbTransTo ;
int * transTo ;
2002-04-16 19:50:10 +04:00
} ;
typedef struct _xmlAutomata xmlRegParserCtxt ;
typedef xmlRegParserCtxt * xmlRegParserCtxtPtr ;
2009-08-31 18:47:39 +04:00
# define AM_AUTOMATA_RNG 1
2002-04-16 19:50:10 +04:00
struct _xmlAutomata {
xmlChar * string ;
xmlChar * cur ;
int error ;
int neg ;
xmlRegStatePtr start ;
xmlRegStatePtr end ;
xmlRegStatePtr state ;
xmlRegAtomPtr atom ;
int maxAtoms ;
int nbAtoms ;
xmlRegAtomPtr * atoms ;
int maxStates ;
int nbStates ;
xmlRegStatePtr * states ;
int maxCounters ;
int nbCounters ;
xmlRegCounter * counters ;
2002-04-22 20:01:24 +04:00
int determinist ;
2005-08-09 15:09:52 +04:00
int negs ;
2009-08-31 18:47:39 +04:00
int flags ;
2020-07-06 16:22:12 +03:00
int depth ;
2002-04-16 19:50:10 +04:00
} ;
struct _xmlRegexp {
xmlChar * string ;
int nbStates ;
xmlRegStatePtr * states ;
int nbAtoms ;
xmlRegAtomPtr * atoms ;
int nbCounters ;
xmlRegCounter * counters ;
2002-04-22 20:01:24 +04:00
int determinist ;
2009-08-31 18:47:39 +04:00
int flags ;
2002-09-19 23:56:43 +04:00
/*
* That ' s the compact form for determinists automatas
*/
int nbstates ;
int * compact ;
2002-09-24 18:13:13 +04:00
void * * transdata ;
2002-09-19 23:56:43 +04:00
int nbstrings ;
xmlChar * * stringMap ;
2002-04-16 19:50:10 +04:00
} ;
typedef struct _xmlRegExecRollback xmlRegExecRollback ;
typedef xmlRegExecRollback * xmlRegExecRollbackPtr ;
struct _xmlRegExecRollback {
xmlRegStatePtr state ; /* the current state */
int index ; /* the index in the input stack */
int nextbranch ; /* the next transition to explore in that state */
2004-05-06 08:17:26 +04:00
int * counts ; /* save the automata state if it has some */
2002-04-16 19:50:10 +04:00
} ;
typedef struct _xmlRegInputToken xmlRegInputToken ;
typedef xmlRegInputToken * xmlRegInputTokenPtr ;
struct _xmlRegInputToken {
xmlChar * value ;
void * data ;
} ;
struct _xmlRegExecCtxt {
int status ; /* execution status != 0 indicate an error */
2004-05-06 08:17:26 +04:00
int determinist ; /* did we find an indeterministic behaviour */
2002-04-16 19:50:10 +04:00
xmlRegexpPtr comp ; /* the compiled regexp */
xmlRegExecCallbacks callback ;
void * data ;
xmlRegStatePtr state ; /* the current state */
int transno ; /* the current transition on that state */
2004-05-06 08:17:26 +04:00
int transcount ; /* the number of chars in char counted transitions */
2002-04-16 19:50:10 +04:00
/*
* A stack of rollback states
*/
int maxRollbacks ;
int nbRollbacks ;
xmlRegExecRollback * rollbacks ;
/*
* The state of the automata if any
*/
int * counts ;
/*
* The input stack
*/
int inputStackMax ;
int inputStackNr ;
int index ;
int * charStack ;
const xmlChar * inputString ; /* when operating on characters */
xmlRegInputTokenPtr inputStack ; /* when operating on strings */
2005-01-07 16:56:19 +03:00
/*
* error handling
*/
int errStateNo ; /* the error state number */
xmlRegStatePtr errState ; /* the error state */
xmlChar * errString ; /* the string raising the error */
int * errCounts ; /* counters at the error state */
2005-09-15 17:09:00 +04:00
int nbPush ;
2002-04-16 19:50:10 +04:00
} ;
2002-04-20 21:38:48 +04:00
# define REGEXP_ALL_COUNTER 0x123456
# define REGEXP_ALL_LAX_COUNTER 0x123457
2002-04-20 10:41:40 +04:00
2002-04-16 19:50:10 +04:00
static void xmlFAParseRegExp ( xmlRegParserCtxtPtr ctxt , int top ) ;
2002-09-19 23:56:43 +04:00
static void xmlRegFreeState ( xmlRegStatePtr state ) ;
static void xmlRegFreeAtom ( xmlRegAtomPtr atom ) ;
2005-07-19 18:33:55 +04:00
static int xmlRegStrEqualWildcard ( const xmlChar * expStr , const xmlChar * valStr ) ;
2005-10-18 23:11:55 +04:00
static int xmlRegCheckCharacter ( xmlRegAtomPtr atom , int codepoint ) ;
static int xmlRegCheckCharacterRange ( xmlRegAtomType type , int codepoint ,
int neg , int start , int end , const xmlChar * blockName ) ;
2002-04-16 19:50:10 +04:00
2003-10-08 12:53:17 +04:00
/************************************************************************
* *
2012-09-11 09:26:36 +04:00
* Regexp memory error handler *
2003-10-08 12:53:17 +04:00
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* xmlRegexpErrMemory :
2004-05-06 08:17:26 +04:00
* @ extra : extra information
2003-10-08 12:53:17 +04:00
*
* Handle an out of memory condition
*/
static void
2023-12-18 23:39:51 +03:00
xmlRegexpErrMemory ( xmlRegParserCtxtPtr ctxt )
2003-10-08 12:53:17 +04:00
{
2023-12-18 23:39:51 +03:00
if ( ctxt ! = NULL )
ctxt - > error = XML_ERR_NO_MEMORY ;
xmlRaiseMemoryError ( NULL , NULL , NULL , XML_FROM_REGEXP , NULL ) ;
2003-10-08 12:53:17 +04:00
}
/**
* xmlRegexpErrCompile :
2004-05-06 08:17:26 +04:00
* @ extra : extra information
2003-10-08 12:53:17 +04:00
*
2004-05-06 08:17:26 +04:00
* Handle a compilation failure
2003-10-08 12:53:17 +04:00
*/
static void
xmlRegexpErrCompile ( xmlRegParserCtxtPtr ctxt , const char * extra )
{
const char * regexp = NULL ;
int idx = 0 ;
2023-12-18 23:39:51 +03:00
int res ;
2003-10-08 12:53:17 +04:00
if ( ctxt ! = NULL ) {
regexp = ( const char * ) ctxt - > string ;
idx = ctxt - > cur - ctxt - > string ;
ctxt - > error = XML_REGEXP_COMPILE_ERROR ;
}
2023-12-18 23:39:51 +03:00
res = __xmlRaiseError ( NULL , NULL , NULL , NULL , NULL , XML_FROM_REGEXP ,
XML_REGEXP_COMPILE_ERROR , XML_ERR_FATAL ,
NULL , 0 , extra , regexp , NULL , idx , 0 ,
" failed to compile: %s \n " , extra ) ;
if ( res < 0 )
xmlRegexpErrMemory ( ctxt ) ;
2003-10-08 12:53:17 +04:00
}
2002-04-16 19:50:10 +04:00
/************************************************************************
2012-09-11 09:26:36 +04:00
* *
* Allocation / Deallocation *
* *
2002-04-16 19:50:10 +04:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2002-09-19 23:56:43 +04:00
static int xmlFAComputesDeterminism ( xmlRegParserCtxtPtr ctxt ) ;
2020-01-02 16:45:28 +03:00
/**
* xmlRegCalloc2 :
* @ dim1 : size of first dimension
* @ dim2 : size of second dimension
* @ elemSize : size of element
*
* Allocate a two - dimensional array and set all elements to zero .
*
* Returns the new array or NULL in case of error .
*/
static void *
xmlRegCalloc2 ( size_t dim1 , size_t dim2 , size_t elemSize ) {
size_t totalSize ;
void * ret ;
/* Check for overflow */
2023-02-21 17:24:19 +03:00
if ( ( dim2 = = 0 ) | | ( elemSize = = 0 ) | |
( dim1 > SIZE_MAX / dim2 / elemSize ) )
2020-01-02 16:45:28 +03:00
return ( NULL ) ;
totalSize = dim1 * dim2 * elemSize ;
ret = xmlMalloc ( totalSize ) ;
if ( ret ! = NULL )
memset ( ret , 0 , totalSize ) ;
return ( ret ) ;
}
2002-04-16 19:50:10 +04:00
/**
* xmlRegEpxFromParse :
* @ ctxt : the parser context used to build it
*
2004-05-06 08:17:26 +04:00
* Allocate a new regexp and fill it with the result from the parser
2002-04-16 19:50:10 +04:00
*
* Returns the new regexp or NULL in case of error
*/
static xmlRegexpPtr
xmlRegEpxFromParse ( xmlRegParserCtxtPtr ctxt ) {
xmlRegexpPtr ret ;
ret = ( xmlRegexpPtr ) xmlMalloc ( sizeof ( xmlRegexp ) ) ;
2003-04-24 20:06:47 +04:00
if ( ret = = NULL ) {
2023-12-18 23:39:51 +03:00
xmlRegexpErrMemory ( ctxt ) ;
2002-04-16 19:50:10 +04:00
return ( NULL ) ;
2003-04-24 20:06:47 +04:00
}
2002-04-16 19:50:10 +04:00
memset ( ret , 0 , sizeof ( xmlRegexp ) ) ;
ret - > string = ctxt - > string ;
ret - > nbStates = ctxt - > nbStates ;
ret - > states = ctxt - > states ;
ret - > nbAtoms = ctxt - > nbAtoms ;
ret - > atoms = ctxt - > atoms ;
ret - > nbCounters = ctxt - > nbCounters ;
ret - > counters = ctxt - > counters ;
2002-04-22 20:01:24 +04:00
ret - > determinist = ctxt - > determinist ;
2009-08-31 18:47:39 +04:00
ret - > flags = ctxt - > flags ;
2005-10-18 23:11:55 +04:00
if ( ret - > determinist = = - 1 ) {
2023-09-22 18:03:56 +03:00
if ( xmlRegexpIsDeterminist ( ret ) < 0 ) {
2023-12-18 23:39:51 +03:00
xmlRegexpErrMemory ( ctxt ) ;
2023-09-22 18:03:56 +03:00
xmlFree ( ret ) ;
return ( NULL ) ;
}
2005-10-18 23:11:55 +04:00
}
2002-09-19 23:56:43 +04:00
if ( ( ret - > determinist ! = 0 ) & &
( ret - > nbCounters = = 0 ) & &
2005-08-09 15:09:52 +04:00
( ctxt - > negs = = 0 ) & &
2002-09-24 18:13:13 +04:00
( ret - > atoms ! = NULL ) & &
2002-09-19 23:56:43 +04:00
( ret - > atoms [ 0 ] ! = NULL ) & &
( ret - > atoms [ 0 ] - > type = = XML_REGEXP_STRING ) ) {
int i , j , nbstates = 0 , nbatoms = 0 ;
int * stateRemap ;
int * stringRemap ;
int * transitions ;
2002-09-24 18:13:13 +04:00
void * * transdata ;
2002-09-19 23:56:43 +04:00
xmlChar * * stringMap ;
xmlChar * value ;
/*
* Switch to a compact representation
* 1 / counting the effective number of states left
2004-05-06 08:17:26 +04:00
* 2 / counting the unique number of atoms , and check that
2002-09-19 23:56:43 +04:00
* they are all of the string type
* 3 / build a table state x atom for the transitions
*/
stateRemap = xmlMalloc ( ret - > nbStates * sizeof ( int ) ) ;
2003-04-24 20:06:47 +04:00
if ( stateRemap = = NULL ) {
2023-12-18 23:39:51 +03:00
xmlRegexpErrMemory ( ctxt ) ;
2003-04-24 20:06:47 +04:00
xmlFree ( ret ) ;
return ( NULL ) ;
}
2002-09-19 23:56:43 +04:00
for ( i = 0 ; i < ret - > nbStates ; i + + ) {
if ( ret - > states [ i ] ! = NULL ) {
stateRemap [ i ] = nbstates ;
nbstates + + ;
} else {
stateRemap [ i ] = - 1 ;
}
}
stringMap = xmlMalloc ( ret - > nbAtoms * sizeof ( char * ) ) ;
2003-04-24 20:06:47 +04:00
if ( stringMap = = NULL ) {
2023-12-18 23:39:51 +03:00
xmlRegexpErrMemory ( ctxt ) ;
2003-04-24 20:06:47 +04:00
xmlFree ( stateRemap ) ;
xmlFree ( ret ) ;
return ( NULL ) ;
}
2002-09-19 23:56:43 +04:00
stringRemap = xmlMalloc ( ret - > nbAtoms * sizeof ( int ) ) ;
2003-04-24 20:06:47 +04:00
if ( stringRemap = = NULL ) {
2023-12-18 23:39:51 +03:00
xmlRegexpErrMemory ( ctxt ) ;
2003-04-24 20:06:47 +04:00
xmlFree ( stringMap ) ;
xmlFree ( stateRemap ) ;
xmlFree ( ret ) ;
return ( NULL ) ;
}
2002-09-19 23:56:43 +04:00
for ( i = 0 ; i < ret - > nbAtoms ; i + + ) {
if ( ( ret - > atoms [ i ] - > type = = XML_REGEXP_STRING ) & &
( ret - > atoms [ i ] - > quant = = XML_REGEXP_QUANT_ONCE ) ) {
value = ret - > atoms [ i ] - > valuep ;
for ( j = 0 ; j < nbatoms ; j + + ) {
if ( xmlStrEqual ( stringMap [ j ] , value ) ) {
stringRemap [ i ] = j ;
break ;
}
}
if ( j > = nbatoms ) {
stringRemap [ i ] = nbatoms ;
stringMap [ nbatoms ] = xmlStrdup ( value ) ;
2003-04-24 20:06:47 +04:00
if ( stringMap [ nbatoms ] = = NULL ) {
for ( i = 0 ; i < nbatoms ; i + + )
xmlFree ( stringMap [ i ] ) ;
xmlFree ( stringRemap ) ;
xmlFree ( stringMap ) ;
xmlFree ( stateRemap ) ;
xmlFree ( ret ) ;
return ( NULL ) ;
}
2002-09-19 23:56:43 +04:00
nbatoms + + ;
}
} else {
xmlFree ( stateRemap ) ;
xmlFree ( stringRemap ) ;
for ( i = 0 ; i < nbatoms ; i + + )
xmlFree ( stringMap [ i ] ) ;
xmlFree ( stringMap ) ;
2003-04-24 20:06:47 +04:00
xmlFree ( ret ) ;
return ( NULL ) ;
2002-09-19 23:56:43 +04:00
}
}
2020-01-02 16:45:28 +03:00
transitions = ( int * ) xmlRegCalloc2 ( nbstates + 1 , nbatoms + 1 ,
sizeof ( int ) ) ;
2003-04-24 20:06:47 +04:00
if ( transitions = = NULL ) {
xmlFree ( stateRemap ) ;
xmlFree ( stringRemap ) ;
2019-08-26 10:24:12 +03:00
for ( i = 0 ; i < nbatoms ; i + + )
xmlFree ( stringMap [ i ] ) ;
2003-04-24 20:06:47 +04:00
xmlFree ( stringMap ) ;
xmlFree ( ret ) ;
return ( NULL ) ;
}
2002-09-19 23:56:43 +04:00
/*
* Allocate the transition table . The first entry for each
2004-05-06 08:17:26 +04:00
* state corresponds to the state type .
2002-09-19 23:56:43 +04:00
*/
2002-09-24 18:13:13 +04:00
transdata = NULL ;
2002-09-19 23:56:43 +04:00
for ( i = 0 ; i < ret - > nbStates ; i + + ) {
int stateno , atomno , targetno , prev ;
xmlRegStatePtr state ;
xmlRegTransPtr trans ;
stateno = stateRemap [ i ] ;
if ( stateno = = - 1 )
continue ;
state = ret - > states [ i ] ;
transitions [ stateno * ( nbatoms + 1 ) ] = state - > type ;
for ( j = 0 ; j < state - > nbTrans ; j + + ) {
trans = & ( state - > trans [ j ] ) ;
2023-03-09 07:34:38 +03:00
if ( ( trans - > to < 0 ) | | ( trans - > atom = = NULL ) )
2002-09-19 23:56:43 +04:00
continue ;
atomno = stringRemap [ trans - > atom - > no ] ;
2002-09-24 18:13:13 +04:00
if ( ( trans - > atom - > data ! = NULL ) & & ( transdata = = NULL ) ) {
2020-01-02 16:45:28 +03:00
transdata = ( void * * ) xmlRegCalloc2 ( nbstates , nbatoms ,
sizeof ( void * ) ) ;
if ( transdata = = NULL ) {
2023-12-18 23:39:51 +03:00
xmlRegexpErrMemory ( ctxt ) ;
2003-04-24 20:06:47 +04:00
break ;
}
2002-09-24 18:13:13 +04:00
}
2002-09-19 23:56:43 +04:00
targetno = stateRemap [ trans - > to ] ;
/*
2004-05-06 08:17:26 +04:00
* if the same atom can generate transitions to 2 different
2019-09-30 18:04:54 +03:00
* states then it means the automata is not deterministic and
2002-09-19 23:56:43 +04:00
* the compact form can ' t be used !
*/
prev = transitions [ stateno * ( nbatoms + 1 ) + atomno + 1 ] ;
if ( prev ! = 0 ) {
if ( prev ! = targetno + 1 ) {
ret - > determinist = 0 ;
2002-09-24 18:13:13 +04:00
if ( transdata ! = NULL )
xmlFree ( transdata ) ;
2002-09-19 23:56:43 +04:00
xmlFree ( transitions ) ;
xmlFree ( stateRemap ) ;
xmlFree ( stringRemap ) ;
for ( i = 0 ; i < nbatoms ; i + + )
xmlFree ( stringMap [ i ] ) ;
xmlFree ( stringMap ) ;
2003-04-24 20:06:47 +04:00
goto not_determ ;
2002-09-19 23:56:43 +04:00
}
} else {
#if 0
printf ( " State %d trans %d: atom %d to %d : %d to %d \n " ,
i , j , trans - > atom - > no , trans - > to , atomno , targetno ) ;
# endif
transitions [ stateno * ( nbatoms + 1 ) + atomno + 1 ] =
2002-09-24 18:13:13 +04:00
targetno + 1 ; /* to avoid 0 */
if ( transdata ! = NULL )
transdata [ stateno * nbatoms + atomno ] =
trans - > atom - > data ;
2002-09-19 23:56:43 +04:00
}
}
}
ret - > determinist = 1 ;
/*
* Cleanup of the old data
*/
if ( ret - > states ! = NULL ) {
for ( i = 0 ; i < ret - > nbStates ; i + + )
xmlRegFreeState ( ret - > states [ i ] ) ;
xmlFree ( ret - > states ) ;
}
ret - > states = NULL ;
ret - > nbStates = 0 ;
if ( ret - > atoms ! = NULL ) {
for ( i = 0 ; i < ret - > nbAtoms ; i + + )
xmlRegFreeAtom ( ret - > atoms [ i ] ) ;
xmlFree ( ret - > atoms ) ;
}
ret - > atoms = NULL ;
ret - > nbAtoms = 0 ;
ret - > compact = transitions ;
2002-09-24 18:13:13 +04:00
ret - > transdata = transdata ;
2002-09-19 23:56:43 +04:00
ret - > stringMap = stringMap ;
ret - > nbstrings = nbatoms ;
ret - > nbstates = nbstates ;
xmlFree ( stateRemap ) ;
xmlFree ( stringRemap ) ;
}
2003-04-24 20:06:47 +04:00
not_determ :
ctxt - > string = NULL ;
ctxt - > nbStates = 0 ;
ctxt - > states = NULL ;
ctxt - > nbAtoms = 0 ;
ctxt - > atoms = NULL ;
ctxt - > nbCounters = 0 ;
ctxt - > counters = NULL ;
2002-04-16 19:50:10 +04:00
return ( ret ) ;
}
/**
* xmlRegNewParserCtxt :
* @ string : the string to parse
*
* Allocate a new regexp parser context
*
* Returns the new context or NULL in case of error
*/
static xmlRegParserCtxtPtr
xmlRegNewParserCtxt ( const xmlChar * string ) {
xmlRegParserCtxtPtr ret ;
ret = ( xmlRegParserCtxtPtr ) xmlMalloc ( sizeof ( xmlRegParserCtxt ) ) ;
if ( ret = = NULL )
return ( NULL ) ;
memset ( ret , 0 , sizeof ( xmlRegParserCtxt ) ) ;
2023-12-10 19:09:45 +03:00
if ( string ! = NULL ) {
2002-04-16 19:50:10 +04:00
ret - > string = xmlStrdup ( string ) ;
2023-12-10 19:09:45 +03:00
if ( ret - > string = = NULL ) {
xmlFree ( ret ) ;
return ( NULL ) ;
}
}
2002-04-16 19:50:10 +04:00
ret - > cur = ret - > string ;
ret - > neg = 0 ;
2005-08-09 15:09:52 +04:00
ret - > negs = 0 ;
2002-04-16 19:50:10 +04:00
ret - > error = 0 ;
2002-04-22 20:01:24 +04:00
ret - > determinist = - 1 ;
2002-04-16 19:50:10 +04:00
return ( ret ) ;
}
/**
* xmlRegNewRange :
* @ ctxt : the regexp parser context
* @ neg : is that negative
* @ type : the type of range
* @ start : the start codepoint
* @ end : the end codepoint
*
* Allocate a new regexp range
*
* Returns the new range or NULL in case of error
*/
static xmlRegRangePtr
xmlRegNewRange ( xmlRegParserCtxtPtr ctxt ,
int neg , xmlRegAtomType type , int start , int end ) {
xmlRegRangePtr ret ;
ret = ( xmlRegRangePtr ) xmlMalloc ( sizeof ( xmlRegRange ) ) ;
if ( ret = = NULL ) {
2023-12-18 23:39:51 +03:00
xmlRegexpErrMemory ( ctxt ) ;
2002-04-16 19:50:10 +04:00
return ( NULL ) ;
}
ret - > neg = neg ;
ret - > type = type ;
ret - > start = start ;
ret - > end = end ;
return ( ret ) ;
}
/**
* xmlRegFreeRange :
* @ range : the regexp range
*
* Free a regexp range
*/
static void
xmlRegFreeRange ( xmlRegRangePtr range ) {
if ( range = = NULL )
return ;
if ( range - > blockName ! = NULL )
xmlFree ( range - > blockName ) ;
xmlFree ( range ) ;
}
2007-08-22 20:29:21 +04:00
/**
* xmlRegCopyRange :
* @ range : the regexp range
*
* Copy a regexp range
*
* Returns the new copy or NULL in case of error .
*/
static xmlRegRangePtr
xmlRegCopyRange ( xmlRegParserCtxtPtr ctxt , xmlRegRangePtr range ) {
xmlRegRangePtr ret ;
if ( range = = NULL )
return ( NULL ) ;
ret = xmlRegNewRange ( ctxt , range - > neg , range - > type , range - > start ,
range - > end ) ;
if ( ret = = NULL )
return ( NULL ) ;
if ( range - > blockName ! = NULL ) {
ret - > blockName = xmlStrdup ( range - > blockName ) ;
if ( ret - > blockName = = NULL ) {
2023-12-18 23:39:51 +03:00
xmlRegexpErrMemory ( ctxt ) ;
2007-08-22 20:29:21 +04:00
xmlRegFreeRange ( ret ) ;
return ( NULL ) ;
}
}
return ( ret ) ;
}
2002-04-16 19:50:10 +04:00
/**
* xmlRegNewAtom :
* @ ctxt : the regexp parser context
* @ type : the type of atom
*
2007-08-22 20:29:21 +04:00
* Allocate a new atom
2002-04-16 19:50:10 +04:00
*
* Returns the new atom or NULL in case of error
*/
static xmlRegAtomPtr
xmlRegNewAtom ( xmlRegParserCtxtPtr ctxt , xmlRegAtomType type ) {
xmlRegAtomPtr ret ;
ret = ( xmlRegAtomPtr ) xmlMalloc ( sizeof ( xmlRegAtom ) ) ;
if ( ret = = NULL ) {
2023-12-18 23:39:51 +03:00
xmlRegexpErrMemory ( ctxt ) ;
2002-04-16 19:50:10 +04:00
return ( NULL ) ;
}
memset ( ret , 0 , sizeof ( xmlRegAtom ) ) ;
ret - > type = type ;
ret - > quant = XML_REGEXP_QUANT_ONCE ;
ret - > min = 0 ;
ret - > max = 0 ;
return ( ret ) ;
}
/**
* xmlRegFreeAtom :
* @ atom : the regexp atom
*
* Free a regexp atom
*/
static void
xmlRegFreeAtom ( xmlRegAtomPtr atom ) {
int i ;
if ( atom = = NULL )
return ;
for ( i = 0 ; i < atom - > nbRanges ; i + + )
xmlRegFreeRange ( atom - > ranges [ i ] ) ;
if ( atom - > ranges ! = NULL )
xmlFree ( atom - > ranges ) ;
2005-07-03 18:35:44 +04:00
if ( ( atom - > type = = XML_REGEXP_STRING ) & & ( atom - > valuep ! = NULL ) )
xmlFree ( atom - > valuep ) ;
2005-07-19 20:26:18 +04:00
if ( ( atom - > type = = XML_REGEXP_STRING ) & & ( atom - > valuep2 ! = NULL ) )
xmlFree ( atom - > valuep2 ) ;
2005-07-03 18:35:44 +04:00
if ( ( atom - > type = = XML_REGEXP_BLOCK_NAME ) & & ( atom - > valuep ! = NULL ) )
2002-04-16 19:50:10 +04:00
xmlFree ( atom - > valuep ) ;
xmlFree ( atom ) ;
}
2007-08-22 20:29:21 +04:00
/**
* xmlRegCopyAtom :
* @ ctxt : the regexp parser context
2019-09-30 18:04:54 +03:00
* @ atom : the original atom
2007-08-22 20:29:21 +04:00
*
* Allocate a new regexp range
*
* Returns the new atom or NULL in case of error
*/
static xmlRegAtomPtr
xmlRegCopyAtom ( xmlRegParserCtxtPtr ctxt , xmlRegAtomPtr atom ) {
xmlRegAtomPtr ret ;
ret = ( xmlRegAtomPtr ) xmlMalloc ( sizeof ( xmlRegAtom ) ) ;
if ( ret = = NULL ) {
2023-12-18 23:39:51 +03:00
xmlRegexpErrMemory ( ctxt ) ;
2007-08-22 20:29:21 +04:00
return ( NULL ) ;
}
memset ( ret , 0 , sizeof ( xmlRegAtom ) ) ;
ret - > type = atom - > type ;
ret - > quant = atom - > quant ;
ret - > min = atom - > min ;
ret - > max = atom - > max ;
if ( atom - > nbRanges > 0 ) {
int i ;
ret - > ranges = ( xmlRegRangePtr * ) xmlMalloc ( sizeof ( xmlRegRangePtr ) *
atom - > nbRanges ) ;
if ( ret - > ranges = = NULL ) {
2023-12-18 23:39:51 +03:00
xmlRegexpErrMemory ( ctxt ) ;
2007-08-22 20:29:21 +04:00
goto error ;
}
for ( i = 0 ; i < atom - > nbRanges ; i + + ) {
ret - > ranges [ i ] = xmlRegCopyRange ( ctxt , atom - > ranges [ i ] ) ;
if ( ret - > ranges [ i ] = = NULL )
goto error ;
ret - > nbRanges = i + 1 ;
}
}
return ( ret ) ;
error :
xmlRegFreeAtom ( ret ) ;
return ( NULL ) ;
}
2002-04-16 19:50:10 +04:00
static xmlRegStatePtr
xmlRegNewState ( xmlRegParserCtxtPtr ctxt ) {
xmlRegStatePtr ret ;
ret = ( xmlRegStatePtr ) xmlMalloc ( sizeof ( xmlRegState ) ) ;
if ( ret = = NULL ) {
2023-12-18 23:39:51 +03:00
xmlRegexpErrMemory ( ctxt ) ;
2002-04-16 19:50:10 +04:00
return ( NULL ) ;
}
memset ( ret , 0 , sizeof ( xmlRegState ) ) ;
ret - > type = XML_REGEXP_TRANS_STATE ;
ret - > mark = XML_REGEXP_MARK_NORMAL ;
return ( ret ) ;
}
/**
* xmlRegFreeState :
* @ state : the regexp state
*
* Free a regexp state
*/
static void
xmlRegFreeState ( xmlRegStatePtr state ) {
if ( state = = NULL )
return ;
if ( state - > trans ! = NULL )
xmlFree ( state - > trans ) ;
2005-07-30 17:18:24 +04:00
if ( state - > transTo ! = NULL )
xmlFree ( state - > transTo ) ;
2002-04-16 19:50:10 +04:00
xmlFree ( state ) ;
}
/**
* xmlRegFreeParserCtxt :
* @ ctxt : the regexp parser context
*
* Free a regexp parser context
*/
static void
xmlRegFreeParserCtxt ( xmlRegParserCtxtPtr ctxt ) {
int i ;
if ( ctxt = = NULL )
return ;
if ( ctxt - > string ! = NULL )
xmlFree ( ctxt - > string ) ;
if ( ctxt - > states ! = NULL ) {
for ( i = 0 ; i < ctxt - > nbStates ; i + + )
xmlRegFreeState ( ctxt - > states [ i ] ) ;
xmlFree ( ctxt - > states ) ;
}
if ( ctxt - > atoms ! = NULL ) {
for ( i = 0 ; i < ctxt - > nbAtoms ; i + + )
xmlRegFreeAtom ( ctxt - > atoms [ i ] ) ;
xmlFree ( ctxt - > atoms ) ;
}
if ( ctxt - > counters ! = NULL )
xmlFree ( ctxt - > counters ) ;
xmlFree ( ctxt ) ;
}
/************************************************************************
2012-09-11 09:26:36 +04:00
* *
* Display of Data structures *
* *
2002-04-16 19:50:10 +04:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static void
xmlRegPrintAtomType ( FILE * output , xmlRegAtomType type ) {
switch ( type ) {
case XML_REGEXP_EPSILON :
fprintf ( output , " epsilon " ) ; break ;
case XML_REGEXP_CHARVAL :
fprintf ( output , " charval " ) ; break ;
case XML_REGEXP_RANGES :
fprintf ( output , " ranges " ) ; break ;
case XML_REGEXP_SUBREG :
fprintf ( output , " subexpr " ) ; break ;
case XML_REGEXP_STRING :
fprintf ( output , " string " ) ; break ;
case XML_REGEXP_ANYCHAR :
fprintf ( output , " anychar " ) ; break ;
case XML_REGEXP_ANYSPACE :
fprintf ( output , " anyspace " ) ; break ;
case XML_REGEXP_NOTSPACE :
fprintf ( output , " notspace " ) ; break ;
case XML_REGEXP_INITNAME :
fprintf ( output , " initname " ) ; break ;
case XML_REGEXP_NOTINITNAME :
fprintf ( output , " notinitname " ) ; break ;
case XML_REGEXP_NAMECHAR :
fprintf ( output , " namechar " ) ; break ;
case XML_REGEXP_NOTNAMECHAR :
fprintf ( output , " notnamechar " ) ; break ;
case XML_REGEXP_DECIMAL :
fprintf ( output , " decimal " ) ; break ;
case XML_REGEXP_NOTDECIMAL :
fprintf ( output , " notdecimal " ) ; break ;
case XML_REGEXP_REALCHAR :
fprintf ( output , " realchar " ) ; break ;
case XML_REGEXP_NOTREALCHAR :
fprintf ( output , " notrealchar " ) ; break ;
case XML_REGEXP_LETTER :
fprintf ( output , " LETTER " ) ; break ;
case XML_REGEXP_LETTER_UPPERCASE :
fprintf ( output , " LETTER_UPPERCASE " ) ; break ;
case XML_REGEXP_LETTER_LOWERCASE :
fprintf ( output , " LETTER_LOWERCASE " ) ; break ;
case XML_REGEXP_LETTER_TITLECASE :
fprintf ( output , " LETTER_TITLECASE " ) ; break ;
case XML_REGEXP_LETTER_MODIFIER :
fprintf ( output , " LETTER_MODIFIER " ) ; break ;
case XML_REGEXP_LETTER_OTHERS :
fprintf ( output , " LETTER_OTHERS " ) ; break ;
case XML_REGEXP_MARK :
fprintf ( output , " MARK " ) ; break ;
case XML_REGEXP_MARK_NONSPACING :
fprintf ( output , " MARK_NONSPACING " ) ; break ;
case XML_REGEXP_MARK_SPACECOMBINING :
fprintf ( output , " MARK_SPACECOMBINING " ) ; break ;
case XML_REGEXP_MARK_ENCLOSING :
fprintf ( output , " MARK_ENCLOSING " ) ; break ;
case XML_REGEXP_NUMBER :
fprintf ( output , " NUMBER " ) ; break ;
case XML_REGEXP_NUMBER_DECIMAL :
fprintf ( output , " NUMBER_DECIMAL " ) ; break ;
case XML_REGEXP_NUMBER_LETTER :
fprintf ( output , " NUMBER_LETTER " ) ; break ;
case XML_REGEXP_NUMBER_OTHERS :
fprintf ( output , " NUMBER_OTHERS " ) ; break ;
case XML_REGEXP_PUNCT :
fprintf ( output , " PUNCT " ) ; break ;
case XML_REGEXP_PUNCT_CONNECTOR :
fprintf ( output , " PUNCT_CONNECTOR " ) ; break ;
case XML_REGEXP_PUNCT_DASH :
fprintf ( output , " PUNCT_DASH " ) ; break ;
case XML_REGEXP_PUNCT_OPEN :
fprintf ( output , " PUNCT_OPEN " ) ; break ;
case XML_REGEXP_PUNCT_CLOSE :
fprintf ( output , " PUNCT_CLOSE " ) ; break ;
case XML_REGEXP_PUNCT_INITQUOTE :
fprintf ( output , " PUNCT_INITQUOTE " ) ; break ;
case XML_REGEXP_PUNCT_FINQUOTE :
fprintf ( output , " PUNCT_FINQUOTE " ) ; break ;
case XML_REGEXP_PUNCT_OTHERS :
fprintf ( output , " PUNCT_OTHERS " ) ; break ;
case XML_REGEXP_SEPAR :
fprintf ( output , " SEPAR " ) ; break ;
case XML_REGEXP_SEPAR_SPACE :
fprintf ( output , " SEPAR_SPACE " ) ; break ;
case XML_REGEXP_SEPAR_LINE :
fprintf ( output , " SEPAR_LINE " ) ; break ;
case XML_REGEXP_SEPAR_PARA :
fprintf ( output , " SEPAR_PARA " ) ; break ;
case XML_REGEXP_SYMBOL :
fprintf ( output , " SYMBOL " ) ; break ;
case XML_REGEXP_SYMBOL_MATH :
fprintf ( output , " SYMBOL_MATH " ) ; break ;
case XML_REGEXP_SYMBOL_CURRENCY :
fprintf ( output , " SYMBOL_CURRENCY " ) ; break ;
case XML_REGEXP_SYMBOL_MODIFIER :
fprintf ( output , " SYMBOL_MODIFIER " ) ; break ;
case XML_REGEXP_SYMBOL_OTHERS :
fprintf ( output , " SYMBOL_OTHERS " ) ; break ;
case XML_REGEXP_OTHER :
fprintf ( output , " OTHER " ) ; break ;
case XML_REGEXP_OTHER_CONTROL :
fprintf ( output , " OTHER_CONTROL " ) ; break ;
case XML_REGEXP_OTHER_FORMAT :
fprintf ( output , " OTHER_FORMAT " ) ; break ;
case XML_REGEXP_OTHER_PRIVATE :
fprintf ( output , " OTHER_PRIVATE " ) ; break ;
case XML_REGEXP_OTHER_NA :
fprintf ( output , " OTHER_NA " ) ; break ;
case XML_REGEXP_BLOCK_NAME :
fprintf ( output , " BLOCK " ) ; break ;
}
}
static void
xmlRegPrintQuantType ( FILE * output , xmlRegQuantType type ) {
switch ( type ) {
case XML_REGEXP_QUANT_EPSILON :
fprintf ( output , " epsilon " ) ; break ;
case XML_REGEXP_QUANT_ONCE :
fprintf ( output , " once " ) ; break ;
case XML_REGEXP_QUANT_OPT :
fprintf ( output , " ? " ) ; break ;
case XML_REGEXP_QUANT_MULT :
fprintf ( output , " * " ) ; break ;
case XML_REGEXP_QUANT_PLUS :
fprintf ( output , " + " ) ; break ;
case XML_REGEXP_QUANT_RANGE :
fprintf ( output , " range " ) ; break ;
2002-04-20 10:41:40 +04:00
case XML_REGEXP_QUANT_ONCEONLY :
fprintf ( output , " onceonly " ) ; break ;
case XML_REGEXP_QUANT_ALL :
fprintf ( output , " all " ) ; break ;
2002-04-16 19:50:10 +04:00
}
}
static void
xmlRegPrintRange ( FILE * output , xmlRegRangePtr range ) {
fprintf ( output , " range: " ) ;
if ( range - > neg )
fprintf ( output , " negative " ) ;
xmlRegPrintAtomType ( output , range - > type ) ;
fprintf ( output , " %c - %c \n " , range - > start , range - > end ) ;
}
static void
xmlRegPrintAtom ( FILE * output , xmlRegAtomPtr atom ) {
fprintf ( output , " atom: " ) ;
if ( atom = = NULL ) {
fprintf ( output , " NULL \n " ) ;
return ;
}
2005-07-19 18:33:55 +04:00
if ( atom - > neg )
fprintf ( output , " not " ) ;
2002-04-16 19:50:10 +04:00
xmlRegPrintAtomType ( output , atom - > type ) ;
xmlRegPrintQuantType ( output , atom - > quant ) ;
if ( atom - > quant = = XML_REGEXP_QUANT_RANGE )
fprintf ( output , " %d-%d " , atom - > min , atom - > max ) ;
if ( atom - > type = = XML_REGEXP_STRING )
fprintf ( output , " '%s' " , ( char * ) atom - > valuep ) ;
if ( atom - > type = = XML_REGEXP_CHARVAL )
fprintf ( output , " char %c \n " , atom - > codepoint ) ;
else if ( atom - > type = = XML_REGEXP_RANGES ) {
int i ;
fprintf ( output , " %d entries \n " , atom - > nbRanges ) ;
for ( i = 0 ; i < atom - > nbRanges ; i + + )
xmlRegPrintRange ( output , atom - > ranges [ i ] ) ;
} else if ( atom - > type = = XML_REGEXP_SUBREG ) {
fprintf ( output , " start %d end %d \n " , atom - > start - > no , atom - > stop - > no ) ;
} else {
fprintf ( output , " \n " ) ;
}
}
static void
xmlRegPrintTrans ( FILE * output , xmlRegTransPtr trans ) {
fprintf ( output , " trans: " ) ;
if ( trans = = NULL ) {
fprintf ( output , " NULL \n " ) ;
return ;
}
if ( trans - > to < 0 ) {
fprintf ( output , " removed \n " ) ;
return ;
}
2005-10-18 23:11:55 +04:00
if ( trans - > nd ! = 0 ) {
if ( trans - > nd = = 2 )
fprintf ( output , " last not determinist, " ) ;
else
fprintf ( output , " not determinist, " ) ;
}
2002-04-16 19:50:10 +04:00
if ( trans - > counter > = 0 ) {
fprintf ( output , " counted %d, " , trans - > counter ) ;
}
2002-04-20 11:24:11 +04:00
if ( trans - > count = = REGEXP_ALL_COUNTER ) {
fprintf ( output , " all transition, " ) ;
} else if ( trans - > count > = 0 ) {
2002-04-16 19:50:10 +04:00
fprintf ( output , " count based %d, " , trans - > count ) ;
}
if ( trans - > atom = = NULL ) {
fprintf ( output , " epsilon to %d \n " , trans - > to ) ;
return ;
}
if ( trans - > atom - > type = = XML_REGEXP_CHARVAL )
fprintf ( output , " char %c " , trans - > atom - > codepoint ) ;
fprintf ( output , " atom %d, to %d \n " , trans - > atom - > no , trans - > to ) ;
}
2012-09-11 09:26:36 +04:00
2002-04-16 19:50:10 +04:00
static void
xmlRegPrintState ( FILE * output , xmlRegStatePtr state ) {
int i ;
fprintf ( output , " state: " ) ;
if ( state = = NULL ) {
fprintf ( output , " NULL \n " ) ;
return ;
}
if ( state - > type = = XML_REGEXP_START_STATE )
fprintf ( output , " START " ) ;
if ( state - > type = = XML_REGEXP_FINAL_STATE )
fprintf ( output , " FINAL " ) ;
2012-09-11 09:26:36 +04:00
2002-04-16 19:50:10 +04:00
fprintf ( output , " %d, %d transitions: \n " , state - > no , state - > nbTrans ) ;
for ( i = 0 ; i < state - > nbTrans ; i + + ) {
xmlRegPrintTrans ( output , & ( state - > trans [ i ] ) ) ;
}
}
/************************************************************************
2012-09-11 09:26:36 +04:00
* *
2002-04-16 19:50:10 +04:00
* Finite Automata structures manipulations *
2012-09-11 09:26:36 +04:00
* *
2002-04-16 19:50:10 +04:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2023-02-17 17:40:32 +03:00
static xmlRegRangePtr
2002-04-16 19:50:10 +04:00
xmlRegAtomAddRange ( xmlRegParserCtxtPtr ctxt , xmlRegAtomPtr atom ,
int neg , xmlRegAtomType type , int start , int end ,
xmlChar * blockName ) {
xmlRegRangePtr range ;
if ( atom = = NULL ) {
ERROR ( " add range: atom is NULL " ) ;
2023-02-17 17:40:32 +03:00
return ( NULL ) ;
2002-04-16 19:50:10 +04:00
}
if ( atom - > type ! = XML_REGEXP_RANGES ) {
ERROR ( " add range: atom is not ranges " ) ;
2023-02-17 17:40:32 +03:00
return ( NULL ) ;
2002-04-16 19:50:10 +04:00
}
if ( atom - > maxRanges = = 0 ) {
atom - > maxRanges = 4 ;
atom - > ranges = ( xmlRegRangePtr * ) xmlMalloc ( atom - > maxRanges *
sizeof ( xmlRegRangePtr ) ) ;
if ( atom - > ranges = = NULL ) {
2023-12-18 23:39:51 +03:00
xmlRegexpErrMemory ( ctxt ) ;
2002-04-16 19:50:10 +04:00
atom - > maxRanges = 0 ;
2023-02-17 17:40:32 +03:00
return ( NULL ) ;
2002-04-16 19:50:10 +04:00
}
} else if ( atom - > nbRanges > = atom - > maxRanges ) {
xmlRegRangePtr * tmp ;
atom - > maxRanges * = 2 ;
tmp = ( xmlRegRangePtr * ) xmlRealloc ( atom - > ranges , atom - > maxRanges *
sizeof ( xmlRegRangePtr ) ) ;
if ( tmp = = NULL ) {
2023-12-18 23:39:51 +03:00
xmlRegexpErrMemory ( ctxt ) ;
2002-04-16 19:50:10 +04:00
atom - > maxRanges / = 2 ;
2023-02-17 17:40:32 +03:00
return ( NULL ) ;
2002-04-16 19:50:10 +04:00
}
atom - > ranges = tmp ;
}
range = xmlRegNewRange ( ctxt , neg , type , start , end ) ;
if ( range = = NULL )
2023-02-17 17:40:32 +03:00
return ( NULL ) ;
2002-04-16 19:50:10 +04:00
range - > blockName = blockName ;
atom - > ranges [ atom - > nbRanges + + ] = range ;
2012-09-11 09:26:36 +04:00
2023-02-17 17:40:32 +03:00
return ( range ) ;
2002-04-16 19:50:10 +04:00
}
static int
xmlRegGetCounter ( xmlRegParserCtxtPtr ctxt ) {
if ( ctxt - > maxCounters = = 0 ) {
ctxt - > maxCounters = 4 ;
ctxt - > counters = ( xmlRegCounter * ) xmlMalloc ( ctxt - > maxCounters *
sizeof ( xmlRegCounter ) ) ;
if ( ctxt - > counters = = NULL ) {
2023-12-18 23:39:51 +03:00
xmlRegexpErrMemory ( ctxt ) ;
2002-04-16 19:50:10 +04:00
ctxt - > maxCounters = 0 ;
return ( - 1 ) ;
}
} else if ( ctxt - > nbCounters > = ctxt - > maxCounters ) {
xmlRegCounter * tmp ;
ctxt - > maxCounters * = 2 ;
tmp = ( xmlRegCounter * ) xmlRealloc ( ctxt - > counters , ctxt - > maxCounters *
sizeof ( xmlRegCounter ) ) ;
if ( tmp = = NULL ) {
2023-12-18 23:39:51 +03:00
xmlRegexpErrMemory ( ctxt ) ;
2002-04-16 19:50:10 +04:00
ctxt - > maxCounters / = 2 ;
return ( - 1 ) ;
}
ctxt - > counters = tmp ;
}
ctxt - > counters [ ctxt - > nbCounters ] . min = - 1 ;
ctxt - > counters [ ctxt - > nbCounters ] . max = - 1 ;
return ( ctxt - > nbCounters + + ) ;
}
2012-09-11 09:26:36 +04:00
static int
2002-04-16 19:50:10 +04:00
xmlRegAtomPush ( xmlRegParserCtxtPtr ctxt , xmlRegAtomPtr atom ) {
if ( atom = = NULL ) {
ERROR ( " atom push: atom is NULL " ) ;
2003-04-24 20:06:47 +04:00
return ( - 1 ) ;
2002-04-16 19:50:10 +04:00
}
2023-02-17 17:19:37 +03:00
if ( ctxt - > nbAtoms > = ctxt - > maxAtoms ) {
size_t newSize = ctxt - > maxAtoms ? ctxt - > maxAtoms * 2 : 4 ;
2002-04-16 19:50:10 +04:00
xmlRegAtomPtr * tmp ;
2023-02-17 17:19:37 +03:00
tmp = xmlRealloc ( ctxt - > atoms , newSize * sizeof ( xmlRegAtomPtr ) ) ;
2002-04-16 19:50:10 +04:00
if ( tmp = = NULL ) {
2023-12-18 23:39:51 +03:00
xmlRegexpErrMemory ( ctxt ) ;
2003-04-24 20:06:47 +04:00
return ( - 1 ) ;
2002-04-16 19:50:10 +04:00
}
ctxt - > atoms = tmp ;
2023-02-17 17:19:37 +03:00
ctxt - > maxAtoms = newSize ;
2002-04-16 19:50:10 +04:00
}
atom - > no = ctxt - > nbAtoms ;
ctxt - > atoms [ ctxt - > nbAtoms + + ] = atom ;
2003-04-24 20:06:47 +04:00
return ( 0 ) ;
2002-04-16 19:50:10 +04:00
}
2012-09-11 09:26:36 +04:00
static void
2005-07-30 17:18:24 +04:00
xmlRegStateAddTransTo ( xmlRegParserCtxtPtr ctxt , xmlRegStatePtr target ,
int from ) {
if ( target - > maxTransTo = = 0 ) {
target - > maxTransTo = 8 ;
target - > transTo = ( int * ) xmlMalloc ( target - > maxTransTo *
sizeof ( int ) ) ;
if ( target - > transTo = = NULL ) {
2023-12-18 23:39:51 +03:00
xmlRegexpErrMemory ( ctxt ) ;
2005-07-30 17:18:24 +04:00
target - > maxTransTo = 0 ;
return ;
}
} else if ( target - > nbTransTo > = target - > maxTransTo ) {
int * tmp ;
target - > maxTransTo * = 2 ;
tmp = ( int * ) xmlRealloc ( target - > transTo , target - > maxTransTo *
sizeof ( int ) ) ;
if ( tmp = = NULL ) {
2023-12-18 23:39:51 +03:00
xmlRegexpErrMemory ( ctxt ) ;
2005-07-30 17:18:24 +04:00
target - > maxTransTo / = 2 ;
return ;
}
target - > transTo = tmp ;
}
target - > transTo [ target - > nbTransTo ] = from ;
target - > nbTransTo + + ;
}
2012-09-11 09:26:36 +04:00
static void
2002-04-16 19:50:10 +04:00
xmlRegStateAddTrans ( xmlRegParserCtxtPtr ctxt , xmlRegStatePtr state ,
xmlRegAtomPtr atom , xmlRegStatePtr target ,
2005-09-26 21:18:17 +04:00
int counter , int count ) {
2004-05-10 11:52:15 +04:00
int nrtrans ;
2002-04-16 19:50:10 +04:00
if ( state = = NULL ) {
ERROR ( " add state: state is NULL " ) ;
return ;
}
if ( target = = NULL ) {
ERROR ( " add state: target is NULL " ) ;
return ;
}
2004-05-10 11:52:15 +04:00
/*
* Other routines follow the philosophy ' When in doubt , add a transition '
* so we check here whether such a transition is already present and , if
* so , silently ignore this request .
*/
2005-09-26 21:18:17 +04:00
for ( nrtrans = state - > nbTrans - 1 ; nrtrans > = 0 ; nrtrans - - ) {
xmlRegTransPtr trans = & ( state - > trans [ nrtrans ] ) ;
if ( ( trans - > atom = = atom ) & &
( trans - > to = = target - > no ) & &
( trans - > counter = = counter ) & &
( trans - > count = = count ) ) {
return ;
2005-07-30 17:18:24 +04:00
}
2004-05-10 11:52:15 +04:00
}
2002-04-16 19:50:10 +04:00
if ( state - > maxTrans = = 0 ) {
2005-07-30 17:18:24 +04:00
state - > maxTrans = 8 ;
2002-04-16 19:50:10 +04:00
state - > trans = ( xmlRegTrans * ) xmlMalloc ( state - > maxTrans *
sizeof ( xmlRegTrans ) ) ;
if ( state - > trans = = NULL ) {
2023-12-18 23:39:51 +03:00
xmlRegexpErrMemory ( ctxt ) ;
2002-04-16 19:50:10 +04:00
state - > maxTrans = 0 ;
return ;
}
} else if ( state - > nbTrans > = state - > maxTrans ) {
xmlRegTrans * tmp ;
state - > maxTrans * = 2 ;
tmp = ( xmlRegTrans * ) xmlRealloc ( state - > trans , state - > maxTrans *
sizeof ( xmlRegTrans ) ) ;
if ( tmp = = NULL ) {
2023-12-18 23:39:51 +03:00
xmlRegexpErrMemory ( ctxt ) ;
2002-04-16 19:50:10 +04:00
state - > maxTrans / = 2 ;
return ;
}
state - > trans = tmp ;
}
state - > trans [ state - > nbTrans ] . atom = atom ;
state - > trans [ state - > nbTrans ] . to = target - > no ;
state - > trans [ state - > nbTrans ] . counter = counter ;
state - > trans [ state - > nbTrans ] . count = count ;
2005-10-18 23:11:55 +04:00
state - > trans [ state - > nbTrans ] . nd = 0 ;
2002-04-16 19:50:10 +04:00
state - > nbTrans + + ;
2005-07-30 17:18:24 +04:00
xmlRegStateAddTransTo ( ctxt , target , state - > no ) ;
2002-04-16 19:50:10 +04:00
}
2023-02-15 03:00:03 +03:00
static xmlRegStatePtr
xmlRegStatePush ( xmlRegParserCtxtPtr ctxt ) {
xmlRegStatePtr state ;
if ( ctxt - > nbStates > = ctxt - > maxStates ) {
size_t newSize = ctxt - > maxStates ? ctxt - > maxStates * 2 : 4 ;
2002-04-16 19:50:10 +04:00
xmlRegStatePtr * tmp ;
2023-02-15 03:00:03 +03:00
tmp = xmlRealloc ( ctxt - > states , newSize * sizeof ( tmp [ 0 ] ) ) ;
2002-04-16 19:50:10 +04:00
if ( tmp = = NULL ) {
2023-12-18 23:39:51 +03:00
xmlRegexpErrMemory ( ctxt ) ;
2023-02-15 03:00:03 +03:00
return ( NULL ) ;
2002-04-16 19:50:10 +04:00
}
ctxt - > states = tmp ;
2023-02-15 03:00:03 +03:00
ctxt - > maxStates = newSize ;
2002-04-16 19:50:10 +04:00
}
2023-02-15 03:00:03 +03:00
state = xmlRegNewState ( ctxt ) ;
if ( state = = NULL )
return ( NULL ) ;
2002-04-16 19:50:10 +04:00
state - > no = ctxt - > nbStates ;
ctxt - > states [ ctxt - > nbStates + + ] = state ;
2023-02-15 03:00:03 +03:00
return ( state ) ;
2002-04-16 19:50:10 +04:00
}
2002-04-20 10:41:40 +04:00
/**
* xmlFAGenerateAllTransition :
2002-04-20 21:38:48 +04:00
* @ ctxt : a regexp parser context
* @ from : the from state
* @ to : the target state or NULL for building a new one
* @ lax :
2002-04-20 10:41:40 +04:00
*
*/
2023-02-15 03:00:03 +03:00
static int
2002-04-20 10:41:40 +04:00
xmlFAGenerateAllTransition ( xmlRegParserCtxtPtr ctxt ,
2002-04-20 21:38:48 +04:00
xmlRegStatePtr from , xmlRegStatePtr to ,
int lax ) {
2002-04-20 10:41:40 +04:00
if ( to = = NULL ) {
2023-02-15 03:00:03 +03:00
to = xmlRegStatePush ( ctxt ) ;
if ( to = = NULL )
return ( - 1 ) ;
2002-04-20 10:41:40 +04:00
ctxt - > state = to ;
}
2002-04-20 21:38:48 +04:00
if ( lax )
2005-09-26 21:18:17 +04:00
xmlRegStateAddTrans ( ctxt , from , NULL , to , - 1 , REGEXP_ALL_LAX_COUNTER ) ;
2002-04-20 21:38:48 +04:00
else
2005-09-26 21:18:17 +04:00
xmlRegStateAddTrans ( ctxt , from , NULL , to , - 1 , REGEXP_ALL_COUNTER ) ;
2023-02-15 03:00:03 +03:00
return ( 0 ) ;
2002-04-20 10:41:40 +04:00
}
2002-04-16 19:50:10 +04:00
/**
* xmlFAGenerateEpsilonTransition :
2002-04-20 21:38:48 +04:00
* @ ctxt : a regexp parser context
* @ from : the from state
* @ to : the target state or NULL for building a new one
2002-04-16 19:50:10 +04:00
*
*/
2023-02-15 03:00:03 +03:00
static int
2002-04-16 19:50:10 +04:00
xmlFAGenerateEpsilonTransition ( xmlRegParserCtxtPtr ctxt ,
xmlRegStatePtr from , xmlRegStatePtr to ) {
if ( to = = NULL ) {
2023-02-15 03:00:03 +03:00
to = xmlRegStatePush ( ctxt ) ;
if ( to = = NULL )
return ( - 1 ) ;
2002-04-16 19:50:10 +04:00
ctxt - > state = to ;
}
2005-09-26 21:18:17 +04:00
xmlRegStateAddTrans ( ctxt , from , NULL , to , - 1 , - 1 ) ;
2023-02-15 03:00:03 +03:00
return ( 0 ) ;
2002-04-16 19:50:10 +04:00
}
/**
* xmlFAGenerateCountedEpsilonTransition :
2002-04-20 21:38:48 +04:00
* @ ctxt : a regexp parser context
* @ from : the from state
* @ to : the target state or NULL for building a new one
2002-04-16 19:50:10 +04:00
* counter : the counter for that transition
*
*/
2023-02-15 03:00:03 +03:00
static int
2002-04-16 19:50:10 +04:00
xmlFAGenerateCountedEpsilonTransition ( xmlRegParserCtxtPtr ctxt ,
xmlRegStatePtr from , xmlRegStatePtr to , int counter ) {
if ( to = = NULL ) {
2023-02-15 03:00:03 +03:00
to = xmlRegStatePush ( ctxt ) ;
if ( to = = NULL )
return ( - 1 ) ;
2002-04-16 19:50:10 +04:00
ctxt - > state = to ;
}
2005-09-26 21:18:17 +04:00
xmlRegStateAddTrans ( ctxt , from , NULL , to , counter , - 1 ) ;
2023-02-15 03:00:03 +03:00
return ( 0 ) ;
2002-04-16 19:50:10 +04:00
}
/**
* xmlFAGenerateCountedTransition :
2002-04-20 21:38:48 +04:00
* @ ctxt : a regexp parser context
* @ from : the from state
* @ to : the target state or NULL for building a new one
2002-04-16 19:50:10 +04:00
* counter : the counter for that transition
*
*/
2023-02-15 03:00:03 +03:00
static int
2002-04-16 19:50:10 +04:00
xmlFAGenerateCountedTransition ( xmlRegParserCtxtPtr ctxt ,
xmlRegStatePtr from , xmlRegStatePtr to , int counter ) {
if ( to = = NULL ) {
2023-02-15 03:00:03 +03:00
to = xmlRegStatePush ( ctxt ) ;
if ( to = = NULL )
return ( - 1 ) ;
2002-04-16 19:50:10 +04:00
ctxt - > state = to ;
}
2005-09-26 21:18:17 +04:00
xmlRegStateAddTrans ( ctxt , from , NULL , to , - 1 , counter ) ;
2023-02-15 03:00:03 +03:00
return ( 0 ) ;
2002-04-16 19:50:10 +04:00
}
/**
* xmlFAGenerateTransitions :
2002-04-20 21:38:48 +04:00
* @ ctxt : a regexp parser context
* @ from : the from state
* @ to : the target state or NULL for building a new one
* @ atom : the atom generating the transition
2002-04-16 19:50:10 +04:00
*
2004-05-06 08:17:26 +04:00
* Returns 0 if success and - 1 in case of error .
2002-04-16 19:50:10 +04:00
*/
2003-04-24 20:06:47 +04:00
static int
2002-04-16 19:50:10 +04:00
xmlFAGenerateTransitions ( xmlRegParserCtxtPtr ctxt , xmlRegStatePtr from ,
xmlRegStatePtr to , xmlRegAtomPtr atom ) {
2008-03-13 10:27:24 +03:00
xmlRegStatePtr end ;
2016-05-09 04:28:38 +03:00
int nullable = 0 ;
2008-03-13 10:27:24 +03:00
2002-04-16 19:50:10 +04:00
if ( atom = = NULL ) {
2019-09-30 18:04:54 +03:00
ERROR ( " generate transition: atom == NULL " ) ;
2003-04-24 20:06:47 +04:00
return ( - 1 ) ;
2002-04-16 19:50:10 +04:00
}
if ( atom - > type = = XML_REGEXP_SUBREG ) {
/*
* this is a subexpression handling one should not need to
2004-05-06 08:17:26 +04:00
* create a new node except for XML_REGEXP_QUANT_RANGE .
2002-04-16 19:50:10 +04:00
*/
if ( ( to ! = NULL ) & & ( atom - > stop ! = to ) & &
( atom - > quant ! = XML_REGEXP_QUANT_RANGE ) ) {
/*
* Generate an epsilon transition to link to the target
*/
xmlFAGenerateEpsilonTransition ( ctxt , atom - > stop , to ) ;
2005-10-20 19:55:25 +04:00
# ifdef DV
2012-09-11 09:26:36 +04:00
} else if ( ( to = = NULL ) & & ( atom - > quant ! = XML_REGEXP_QUANT_RANGE ) & &
2005-10-20 19:55:25 +04:00
( atom - > quant ! = XML_REGEXP_QUANT_ONCE ) ) {
2023-02-15 03:00:03 +03:00
to = xmlRegStatePush ( ctxt , to ) ;
if ( to = = NULL )
return ( - 1 ) ;
2005-10-20 19:55:25 +04:00
ctxt - > state = to ;
xmlFAGenerateEpsilonTransition ( ctxt , atom - > stop , to ) ;
# endif
2002-04-16 19:50:10 +04:00
}
switch ( atom - > quant ) {
case XML_REGEXP_QUANT_OPT :
atom - > quant = XML_REGEXP_QUANT_ONCE ;
2006-03-22 02:17:57 +03:00
/*
* transition done to the state after end of atom .
* 1. set transition from atom start to new state
2012-09-11 09:26:36 +04:00
* 2. set transition from atom end to this state .
2006-03-22 02:17:57 +03:00
*/
2009-08-22 20:56:01 +04:00
if ( to = = NULL ) {
xmlFAGenerateEpsilonTransition ( ctxt , atom - > start , 0 ) ;
xmlFAGenerateEpsilonTransition ( ctxt , atom - > stop ,
ctxt - > state ) ;
} else {
xmlFAGenerateEpsilonTransition ( ctxt , atom - > start , to ) ;
}
2002-04-16 19:50:10 +04:00
break ;
case XML_REGEXP_QUANT_MULT :
atom - > quant = XML_REGEXP_QUANT_ONCE ;
xmlFAGenerateEpsilonTransition ( ctxt , atom - > start , atom - > stop ) ;
xmlFAGenerateEpsilonTransition ( ctxt , atom - > stop , atom - > start ) ;
break ;
case XML_REGEXP_QUANT_PLUS :
atom - > quant = XML_REGEXP_QUANT_ONCE ;
xmlFAGenerateEpsilonTransition ( ctxt , atom - > stop , atom - > start ) ;
break ;
case XML_REGEXP_QUANT_RANGE : {
int counter ;
2007-08-22 20:29:21 +04:00
xmlRegStatePtr inter , newstate ;
2002-04-16 19:50:10 +04:00
/*
2007-08-22 20:29:21 +04:00
* create the final state now if needed
2002-04-16 19:50:10 +04:00
*/
2007-08-22 20:29:21 +04:00
if ( to ! = NULL ) {
newstate = to ;
} else {
2023-02-15 03:00:03 +03:00
newstate = xmlRegStatePush ( ctxt ) ;
if ( newstate = = NULL )
return ( - 1 ) ;
2007-08-22 20:29:21 +04:00
}
/*
* The principle here is to use counted transition
* to avoid explosion in the number of states in the
* graph . This is clearly more complex but should not
* be exploitable at runtime .
*/
if ( ( atom - > min = = 0 ) & & ( atom - > start0 = = NULL ) ) {
xmlRegAtomPtr copy ;
/*
* duplicate a transition based on atom to count next
2019-09-30 18:04:54 +03:00
* occurrences after 1. We cannot loop to atom - > start
2012-09-11 09:26:36 +04:00
* directly because we need an epsilon transition to
2007-08-22 20:29:21 +04:00
* newstate .
*/
/* ???? For some reason it seems we never reach that
case , I suppose this got optimized out before when
building the automata */
2007-08-28 21:33:45 +04:00
copy = xmlRegCopyAtom ( ctxt , atom ) ;
2007-08-22 20:29:21 +04:00
if ( copy = = NULL )
return ( - 1 ) ;
copy - > quant = XML_REGEXP_QUANT_ONCE ;
copy - > min = 0 ;
copy - > max = 0 ;
if ( xmlFAGenerateTransitions ( ctxt , atom - > start , NULL , copy )
2023-02-17 17:20:33 +03:00
< 0 ) {
xmlRegFreeAtom ( copy ) ;
2007-08-22 20:29:21 +04:00
return ( - 1 ) ;
2023-02-17 17:20:33 +03:00
}
2007-08-22 20:29:21 +04:00
inter = ctxt - > state ;
counter = xmlRegGetCounter ( ctxt ) ;
2023-02-17 17:53:07 +03:00
if ( counter < 0 )
return ( - 1 ) ;
2007-08-22 20:29:21 +04:00
ctxt - > counters [ counter ] . min = atom - > min - 1 ;
ctxt - > counters [ counter ] . max = atom - > max - 1 ;
/* count the number of times we see it again */
xmlFAGenerateCountedEpsilonTransition ( ctxt , inter ,
atom - > stop , counter ) ;
/* allow a way out based on the count */
xmlFAGenerateCountedTransition ( ctxt , inter ,
newstate , counter ) ;
/* and also allow a direct exit for 0 */
2004-05-06 08:17:26 +04:00
xmlFAGenerateEpsilonTransition ( ctxt , atom - > start ,
2007-08-22 20:29:21 +04:00
newstate ) ;
} else {
/*
* either we need the atom at least once or there
2019-09-30 18:04:54 +03:00
* is an atom - > start0 allowing to easily plug the
2007-08-22 20:29:21 +04:00
* epsilon transition .
*/
counter = xmlRegGetCounter ( ctxt ) ;
2023-02-17 17:53:07 +03:00
if ( counter < 0 )
return ( - 1 ) ;
2007-08-22 20:29:21 +04:00
ctxt - > counters [ counter ] . min = atom - > min - 1 ;
ctxt - > counters [ counter ] . max = atom - > max - 1 ;
/* allow a way out based on the count */
xmlFAGenerateCountedTransition ( ctxt , atom - > stop ,
newstate , counter ) ;
2021-12-20 02:31:41 +03:00
/* count the number of times we see it again */
xmlFAGenerateCountedEpsilonTransition ( ctxt , atom - > stop ,
atom - > start , counter ) ;
2007-08-22 20:29:21 +04:00
/* and if needed allow a direct exit for 0 */
if ( atom - > min = = 0 )
xmlFAGenerateEpsilonTransition ( ctxt , atom - > start0 ,
newstate ) ;
2004-05-06 08:17:26 +04:00
}
2002-04-16 19:50:10 +04:00
atom - > min = 0 ;
atom - > max = 0 ;
atom - > quant = XML_REGEXP_QUANT_ONCE ;
2005-11-09 11:56:26 +03:00
ctxt - > state = newstate ;
2002-04-16 19:50:10 +04:00
}
default :
break ;
}
2023-02-17 17:20:33 +03:00
if ( xmlRegAtomPush ( ctxt , atom ) < 0 )
return ( - 1 ) ;
2003-04-24 20:06:47 +04:00
return ( 0 ) ;
2012-09-11 09:26:36 +04:00
}
2005-10-18 23:11:55 +04:00
if ( ( atom - > min = = 0 ) & & ( atom - > max = = 0 ) & &
2005-07-14 16:58:49 +04:00
( atom - > quant = = XML_REGEXP_QUANT_RANGE ) ) {
/*
* we can discard the atom and generate an epsilon transition instead
*/
if ( to = = NULL ) {
2023-02-15 03:00:03 +03:00
to = xmlRegStatePush ( ctxt ) ;
if ( to = = NULL )
2005-07-14 16:58:49 +04:00
return ( - 1 ) ;
}
xmlFAGenerateEpsilonTransition ( ctxt , from , to ) ;
ctxt - > state = to ;
xmlRegFreeAtom ( atom ) ;
return ( 0 ) ;
2005-10-18 23:11:55 +04:00
}
if ( to = = NULL ) {
2023-02-15 03:00:03 +03:00
to = xmlRegStatePush ( ctxt ) ;
if ( to = = NULL )
2003-04-24 20:06:47 +04:00
return ( - 1 ) ;
2012-09-11 09:26:36 +04:00
}
2008-03-13 10:27:24 +03:00
end = to ;
2012-09-11 09:26:36 +04:00
if ( ( atom - > quant = = XML_REGEXP_QUANT_MULT ) | |
2008-03-13 10:27:24 +03:00
( atom - > quant = = XML_REGEXP_QUANT_PLUS ) ) {
/*
* Do not pollute the target state by adding transitions from
* it as it is likely to be the shared target of multiple branches .
* So isolate with an epsilon transition .
*/
xmlRegStatePtr tmp ;
2012-09-11 09:26:36 +04:00
2023-02-15 03:00:03 +03:00
tmp = xmlRegStatePush ( ctxt ) ;
if ( tmp = = NULL )
2008-03-13 10:27:24 +03:00
return ( - 1 ) ;
xmlFAGenerateEpsilonTransition ( ctxt , tmp , to ) ;
to = tmp ;
2002-04-16 19:50:10 +04:00
}
2016-05-09 04:28:38 +03:00
if ( ( atom - > quant = = XML_REGEXP_QUANT_RANGE ) & &
( atom - > min = = 0 ) & & ( atom - > max > 0 ) ) {
nullable = 1 ;
atom - > min = 1 ;
if ( atom - > max = = 1 )
atom - > quant = XML_REGEXP_QUANT_OPT ;
}
2005-10-18 23:11:55 +04:00
xmlRegStateAddTrans ( ctxt , from , atom , to , - 1 , - 1 ) ;
2008-03-13 10:27:24 +03:00
ctxt - > state = end ;
2002-04-16 19:50:10 +04:00
switch ( atom - > quant ) {
case XML_REGEXP_QUANT_OPT :
atom - > quant = XML_REGEXP_QUANT_ONCE ;
xmlFAGenerateEpsilonTransition ( ctxt , from , to ) ;
break ;
case XML_REGEXP_QUANT_MULT :
atom - > quant = XML_REGEXP_QUANT_ONCE ;
xmlFAGenerateEpsilonTransition ( ctxt , from , to ) ;
2005-09-26 21:18:17 +04:00
xmlRegStateAddTrans ( ctxt , to , atom , to , - 1 , - 1 ) ;
2002-04-16 19:50:10 +04:00
break ;
case XML_REGEXP_QUANT_PLUS :
atom - > quant = XML_REGEXP_QUANT_ONCE ;
2005-09-26 21:18:17 +04:00
xmlRegStateAddTrans ( ctxt , to , atom , to , - 1 , - 1 ) ;
2002-04-16 19:50:10 +04:00
break ;
2012-09-11 09:26:36 +04:00
case XML_REGEXP_QUANT_RANGE :
2016-05-09 04:28:38 +03:00
if ( nullable )
2007-04-11 18:33:46 +04:00
xmlFAGenerateEpsilonTransition ( ctxt , from , to ) ;
break ;
2002-04-16 19:50:10 +04:00
default :
break ;
}
2023-02-17 17:20:33 +03:00
if ( xmlRegAtomPush ( ctxt , atom ) < 0 )
return ( - 1 ) ;
2003-04-24 20:06:47 +04:00
return ( 0 ) ;
2002-04-16 19:50:10 +04:00
}
/**
* xmlFAReduceEpsilonTransitions :
2002-04-20 21:38:48 +04:00
* @ ctxt : a regexp parser context
2002-04-16 19:50:10 +04:00
* @ fromnr : the from state
2012-09-11 09:26:36 +04:00
* @ tonr : the to state
2004-05-06 08:17:26 +04:00
* @ counter : should that transition be associated to a counted
2002-04-16 19:50:10 +04:00
*
*/
static void
xmlFAReduceEpsilonTransitions ( xmlRegParserCtxtPtr ctxt , int fromnr ,
int tonr , int counter ) {
int transnr ;
xmlRegStatePtr from ;
xmlRegStatePtr to ;
from = ctxt - > states [ fromnr ] ;
if ( from = = NULL )
return ;
to = ctxt - > states [ tonr ] ;
if ( to = = NULL )
return ;
if ( ( to - > mark = = XML_REGEXP_MARK_START ) | |
( to - > mark = = XML_REGEXP_MARK_VISITED ) )
return ;
to - > mark = XML_REGEXP_MARK_VISITED ;
if ( to - > type = = XML_REGEXP_FINAL_STATE ) {
from - > type = XML_REGEXP_FINAL_STATE ;
}
for ( transnr = 0 ; transnr < to - > nbTrans ; transnr + + ) {
2023-03-09 18:34:11 +03:00
xmlRegTransPtr t1 = & to - > trans [ transnr ] ;
int tcounter ;
if ( t1 - > to < 0 )
2005-07-30 17:18:24 +04:00
continue ;
2023-03-09 18:34:11 +03:00
if ( t1 - > counter > = 0 ) {
/* assert(counter < 0); */
tcounter = t1 - > counter ;
} else {
tcounter = counter ;
}
if ( t1 - > atom = = NULL ) {
2002-04-16 19:50:10 +04:00
/*
* Don ' t remove counted transitions
* Don ' t loop either
*/
2023-03-09 18:34:11 +03:00
if ( t1 - > to ! = fromnr ) {
if ( t1 - > count > = 0 ) {
xmlRegStateAddTrans ( ctxt , from , NULL , ctxt - > states [ t1 - > to ] ,
- 1 , t1 - > count ) ;
2002-04-17 20:28:10 +04:00
} else {
2023-03-09 18:34:11 +03:00
xmlFAReduceEpsilonTransitions ( ctxt , fromnr , t1 - > to ,
tcounter ) ;
2002-04-17 20:28:10 +04:00
}
2002-04-16 19:50:10 +04:00
}
} else {
2023-03-09 18:34:11 +03:00
xmlRegStateAddTrans ( ctxt , from , t1 - > atom ,
ctxt - > states [ t1 - > to ] , tcounter , - 1 ) ;
2002-04-16 19:50:10 +04:00
}
}
2023-03-09 07:25:09 +03:00
}
/**
* xmlFAFinishReduceEpsilonTransitions :
* @ ctxt : a regexp parser context
* @ fromnr : the from state
* @ tonr : the to state
* @ counter : should that transition be associated to a counted
*
*/
static void
xmlFAFinishReduceEpsilonTransitions ( xmlRegParserCtxtPtr ctxt , int tonr ) {
int transnr ;
xmlRegStatePtr to ;
to = ctxt - > states [ tonr ] ;
2023-05-04 13:47:00 +03:00
if ( to = = NULL )
return ;
2023-03-09 07:25:09 +03:00
if ( ( to - > mark = = XML_REGEXP_MARK_START ) | |
( to - > mark = = XML_REGEXP_MARK_NORMAL ) )
return ;
2002-04-16 19:50:10 +04:00
to - > mark = XML_REGEXP_MARK_NORMAL ;
2023-03-09 07:25:09 +03:00
for ( transnr = 0 ; transnr < to - > nbTrans ; transnr + + ) {
xmlRegTransPtr t1 = & to - > trans [ transnr ] ;
if ( ( t1 - > to > = 0 ) & & ( t1 - > atom = = NULL ) )
xmlFAFinishReduceEpsilonTransitions ( ctxt , t1 - > to ) ;
}
2002-04-16 19:50:10 +04:00
}
2005-07-30 17:18:24 +04:00
/**
* xmlFAEliminateSimpleEpsilonTransitions :
* @ ctxt : a regexp parser context
*
2012-09-11 09:26:36 +04:00
* Eliminating general epsilon transitions can get costly in the general
2005-07-30 17:18:24 +04:00
* algorithm due to the large amount of generated new transitions and
* associated comparisons . However for simple epsilon transition used just
* to separate building blocks when generating the automata this can be
* reduced to state elimination :
* - if there exists an epsilon from X to Y
* - if there is no other transition from X
* then X and Y are semantically equivalent and X can be eliminated
* If X is the start state then make Y the start state , else replace the
* target of all transitions to X by transitions to Y .
2021-07-06 22:56:04 +03:00
*
* If X is a final state , skip it .
* Otherwise it would be necessary to manipulate counters for this case when
* eliminating state 2 :
* State 1 has a transition with an atom to state 2.
* State 2 is final and has an epsilon transition to state 1.
2005-07-30 17:18:24 +04:00
*/
static void
xmlFAEliminateSimpleEpsilonTransitions ( xmlRegParserCtxtPtr ctxt ) {
int statenr , i , j , newto ;
xmlRegStatePtr state , tmp ;
for ( statenr = 0 ; statenr < ctxt - > nbStates ; statenr + + ) {
state = ctxt - > states [ statenr ] ;
if ( state = = NULL )
continue ;
if ( state - > nbTrans ! = 1 )
continue ;
2021-07-06 22:56:04 +03:00
if ( state - > type = = XML_REGEXP_UNREACH_STATE | |
state - > type = = XML_REGEXP_FINAL_STATE )
2006-11-01 18:33:04 +03:00
continue ;
2005-07-30 17:18:24 +04:00
/* is the only transition out a basic transition */
if ( ( state - > trans [ 0 ] . atom = = NULL ) & &
( state - > trans [ 0 ] . to > = 0 ) & &
( state - > trans [ 0 ] . to ! = statenr ) & &
( state - > trans [ 0 ] . counter < 0 ) & &
( state - > trans [ 0 ] . count < 0 ) ) {
newto = state - > trans [ 0 ] . to ;
if ( state - > type = = XML_REGEXP_START_STATE ) {
} else {
for ( i = 0 ; i < state - > nbTransTo ; i + + ) {
tmp = ctxt - > states [ state - > transTo [ i ] ] ;
for ( j = 0 ; j < tmp - > nbTrans ; j + + ) {
if ( tmp - > trans [ j ] . to = = statenr ) {
2006-11-01 18:33:04 +03:00
tmp - > trans [ j ] . to = - 1 ;
xmlRegStateAddTrans ( ctxt , tmp , tmp - > trans [ j ] . atom ,
2012-09-11 09:26:36 +04:00
ctxt - > states [ newto ] ,
2006-11-01 18:33:04 +03:00
tmp - > trans [ j ] . counter ,
tmp - > trans [ j ] . count ) ;
2005-07-30 17:18:24 +04:00
}
}
}
if ( state - > type = = XML_REGEXP_FINAL_STATE )
ctxt - > states [ newto ] - > type = XML_REGEXP_FINAL_STATE ;
/* eliminate the transition completely */
state - > nbTrans = 0 ;
2006-11-01 18:33:04 +03:00
state - > type = XML_REGEXP_UNREACH_STATE ;
2005-07-30 17:18:24 +04:00
}
2012-09-11 09:26:36 +04:00
2005-07-30 17:18:24 +04:00
}
}
}
2002-04-16 19:50:10 +04:00
/**
* xmlFAEliminateEpsilonTransitions :
2002-04-20 21:38:48 +04:00
* @ ctxt : a regexp parser context
2002-04-16 19:50:10 +04:00
*
*/
static void
xmlFAEliminateEpsilonTransitions ( xmlRegParserCtxtPtr ctxt ) {
int statenr , transnr ;
xmlRegStatePtr state ;
2005-07-30 17:18:24 +04:00
int has_epsilon ;
2002-04-16 19:50:10 +04:00
2003-04-24 20:06:47 +04:00
if ( ctxt - > states = = NULL ) return ;
2006-11-01 18:33:04 +03:00
/*
* Eliminate simple epsilon transition and the associated unreachable
* states .
*/
2005-07-30 17:18:24 +04:00
xmlFAEliminateSimpleEpsilonTransitions ( ctxt ) ;
2006-11-01 18:33:04 +03:00
for ( statenr = 0 ; statenr < ctxt - > nbStates ; statenr + + ) {
state = ctxt - > states [ statenr ] ;
if ( ( state ! = NULL ) & & ( state - > type = = XML_REGEXP_UNREACH_STATE ) ) {
xmlRegFreeState ( state ) ;
ctxt - > states [ statenr ] = NULL ;
}
}
2005-07-30 17:18:24 +04:00
has_epsilon = 0 ;
2003-04-24 20:06:47 +04:00
2002-04-16 19:50:10 +04:00
/*
2006-11-02 13:28:04 +03:00
* Build the completed transitions bypassing the epsilons
2002-04-16 19:50:10 +04:00
* Use a marking algorithm to avoid loops
2006-11-02 13:28:04 +03:00
* Mark sink states too .
2019-09-30 18:04:54 +03:00
* Process from the latest states backward to the start when
2006-11-02 13:28:04 +03:00
* there is long cascading epsilon chains this minimize the
* recursions and transition compares when adding the new ones
2002-04-16 19:50:10 +04:00
*/
2006-11-02 13:28:04 +03:00
for ( statenr = ctxt - > nbStates - 1 ; statenr > = 0 ; statenr - - ) {
2002-04-16 19:50:10 +04:00
state = ctxt - > states [ statenr ] ;
if ( state = = NULL )
continue ;
2005-01-12 16:21:17 +03:00
if ( ( state - > nbTrans = = 0 ) & &
( state - > type ! = XML_REGEXP_FINAL_STATE ) ) {
state - > type = XML_REGEXP_SINK_STATE ;
}
2002-04-16 19:50:10 +04:00
for ( transnr = 0 ; transnr < state - > nbTrans ; transnr + + ) {
if ( ( state - > trans [ transnr ] . atom = = NULL ) & &
( state - > trans [ transnr ] . to > = 0 ) ) {
if ( state - > trans [ transnr ] . to = = statenr ) {
state - > trans [ transnr ] . to = - 1 ;
} else if ( state - > trans [ transnr ] . count < 0 ) {
int newto = state - > trans [ transnr ] . to ;
2005-07-30 17:18:24 +04:00
has_epsilon = 1 ;
2006-11-01 18:33:04 +03:00
state - > trans [ transnr ] . to = - 2 ;
state - > mark = XML_REGEXP_MARK_START ;
2002-04-16 19:50:10 +04:00
xmlFAReduceEpsilonTransitions ( ctxt , statenr ,
newto , state - > trans [ transnr ] . counter ) ;
2023-03-09 07:25:09 +03:00
xmlFAFinishReduceEpsilonTransitions ( ctxt , newto ) ;
2002-04-16 19:50:10 +04:00
state - > mark = XML_REGEXP_MARK_NORMAL ;
}
}
}
}
/*
* Eliminate the epsilon transitions
*/
2005-07-30 17:18:24 +04:00
if ( has_epsilon ) {
for ( statenr = 0 ; statenr < ctxt - > nbStates ; statenr + + ) {
state = ctxt - > states [ statenr ] ;
if ( state = = NULL )
continue ;
for ( transnr = 0 ; transnr < state - > nbTrans ; transnr + + ) {
xmlRegTransPtr trans = & ( state - > trans [ transnr ] ) ;
if ( ( trans - > atom = = NULL ) & &
( trans - > count < 0 ) & &
( trans - > to > = 0 ) ) {
trans - > to = - 1 ;
}
2002-04-16 19:50:10 +04:00
}
}
}
2002-09-19 23:56:43 +04:00
/*
* Use this pass to detect unreachable states too
*/
for ( statenr = 0 ; statenr < ctxt - > nbStates ; statenr + + ) {
state = ctxt - > states [ statenr ] ;
if ( state ! = NULL )
2003-08-01 19:55:39 +04:00
state - > reached = XML_REGEXP_MARK_NORMAL ;
2002-09-19 23:56:43 +04:00
}
state = ctxt - > states [ 0 ] ;
if ( state ! = NULL )
2003-08-01 19:55:39 +04:00
state - > reached = XML_REGEXP_MARK_START ;
2002-09-19 23:56:43 +04:00
while ( state ! = NULL ) {
xmlRegStatePtr target = NULL ;
2003-08-01 19:55:39 +04:00
state - > reached = XML_REGEXP_MARK_VISITED ;
2002-09-19 23:56:43 +04:00
/*
2004-05-06 08:17:26 +04:00
* Mark all states reachable from the current reachable state
2002-09-19 23:56:43 +04:00
*/
for ( transnr = 0 ; transnr < state - > nbTrans ; transnr + + ) {
if ( ( state - > trans [ transnr ] . to > = 0 ) & &
( ( state - > trans [ transnr ] . atom ! = NULL ) | |
( state - > trans [ transnr ] . count > = 0 ) ) ) {
int newto = state - > trans [ transnr ] . to ;
if ( ctxt - > states [ newto ] = = NULL )
continue ;
2003-08-01 19:55:39 +04:00
if ( ctxt - > states [ newto ] - > reached = = XML_REGEXP_MARK_NORMAL ) {
ctxt - > states [ newto ] - > reached = XML_REGEXP_MARK_START ;
2002-09-19 23:56:43 +04:00
target = ctxt - > states [ newto ] ;
}
}
}
2005-01-12 16:21:17 +03:00
2002-09-19 23:56:43 +04:00
/*
* find the next accessible state not explored
*/
if ( target = = NULL ) {
for ( statenr = 1 ; statenr < ctxt - > nbStates ; statenr + + ) {
state = ctxt - > states [ statenr ] ;
2003-08-01 19:55:39 +04:00
if ( ( state ! = NULL ) & & ( state - > reached = =
XML_REGEXP_MARK_START ) ) {
2002-09-19 23:56:43 +04:00
target = state ;
break ;
}
}
}
state = target ;
}
for ( statenr = 0 ; statenr < ctxt - > nbStates ; statenr + + ) {
state = ctxt - > states [ statenr ] ;
2003-08-01 19:55:39 +04:00
if ( ( state ! = NULL ) & & ( state - > reached = = XML_REGEXP_MARK_NORMAL ) ) {
2002-09-19 23:56:43 +04:00
xmlRegFreeState ( state ) ;
ctxt - > states [ statenr ] = NULL ;
}
}
2002-04-16 19:50:10 +04:00
}
2005-10-18 23:11:55 +04:00
static int
xmlFACompareRanges ( xmlRegRangePtr range1 , xmlRegRangePtr range2 ) {
int ret = 0 ;
if ( ( range1 - > type = = XML_REGEXP_RANGES ) | |
( range2 - > type = = XML_REGEXP_RANGES ) | |
( range2 - > type = = XML_REGEXP_SUBREG ) | |
( range1 - > type = = XML_REGEXP_SUBREG ) | |
( range1 - > type = = XML_REGEXP_STRING ) | |
( range2 - > type = = XML_REGEXP_STRING ) )
return ( - 1 ) ;
/* put them in order */
if ( range1 - > type > range2 - > type ) {
xmlRegRangePtr tmp ;
tmp = range1 ;
range1 = range2 ;
range2 = tmp ;
}
if ( ( range1 - > type = = XML_REGEXP_ANYCHAR ) | |
( range2 - > type = = XML_REGEXP_ANYCHAR ) ) {
ret = 1 ;
} else if ( ( range1 - > type = = XML_REGEXP_EPSILON ) | |
( range2 - > type = = XML_REGEXP_EPSILON ) ) {
return ( 0 ) ;
} else if ( range1 - > type = = range2 - > type ) {
2009-09-23 20:28:43 +04:00
if ( range1 - > type ! = XML_REGEXP_CHARVAL )
ret = 1 ;
else if ( ( range1 - > end < range2 - > start ) | |
( range2 - > end < range1 - > start ) )
2005-10-18 23:11:55 +04:00
ret = 0 ;
2009-09-23 20:28:43 +04:00
else
ret = 1 ;
2005-10-18 23:11:55 +04:00
} else if ( range1 - > type = = XML_REGEXP_CHARVAL ) {
int codepoint ;
int neg = 0 ;
/*
* just check all codepoints in the range for acceptance ,
* this is usually way cheaper since done only once at
2012-09-11 09:26:36 +04:00
* compilation than testing over and over at runtime or
2005-10-18 23:11:55 +04:00
* pushing too many states when evaluating .
*/
if ( ( ( range1 - > neg = = 0 ) & & ( range2 - > neg ! = 0 ) ) | |
( ( range1 - > neg ! = 0 ) & & ( range2 - > neg = = 0 ) ) )
neg = 1 ;
for ( codepoint = range1 - > start ; codepoint < = range1 - > end ; codepoint + + ) {
ret = xmlRegCheckCharacterRange ( range2 - > type , codepoint ,
0 , range2 - > start , range2 - > end ,
range2 - > blockName ) ;
if ( ret < 0 )
return ( - 1 ) ;
if ( ( ( neg = = 1 ) & & ( ret = = 0 ) ) | |
( ( neg = = 0 ) & & ( ret = = 1 ) ) )
return ( 1 ) ;
}
return ( 0 ) ;
} else if ( ( range1 - > type = = XML_REGEXP_BLOCK_NAME ) | |
( range2 - > type = = XML_REGEXP_BLOCK_NAME ) ) {
if ( range1 - > type = = range2 - > type ) {
ret = xmlStrEqual ( range1 - > blockName , range2 - > blockName ) ;
} else {
/*
* comparing a block range with anything else is way
2019-09-30 18:04:54 +03:00
* too costly , and maintaining the table is like too much
2005-10-18 23:11:55 +04:00
* memory too , so let ' s force the automata to save state
* here .
*/
return ( 1 ) ;
}
} else if ( ( range1 - > type < XML_REGEXP_LETTER ) | |
( range2 - > type < XML_REGEXP_LETTER ) ) {
if ( ( range1 - > type = = XML_REGEXP_ANYSPACE ) & &
( range2 - > type = = XML_REGEXP_NOTSPACE ) )
ret = 0 ;
else if ( ( range1 - > type = = XML_REGEXP_INITNAME ) & &
( range2 - > type = = XML_REGEXP_NOTINITNAME ) )
ret = 0 ;
else if ( ( range1 - > type = = XML_REGEXP_NAMECHAR ) & &
( range2 - > type = = XML_REGEXP_NOTNAMECHAR ) )
ret = 0 ;
else if ( ( range1 - > type = = XML_REGEXP_DECIMAL ) & &
( range2 - > type = = XML_REGEXP_NOTDECIMAL ) )
ret = 0 ;
else if ( ( range1 - > type = = XML_REGEXP_REALCHAR ) & &
( range2 - > type = = XML_REGEXP_NOTREALCHAR ) )
ret = 0 ;
else {
/* same thing to limit complexity */
return ( 1 ) ;
}
} else {
ret = 0 ;
/* range1->type < range2->type here */
switch ( range1 - > type ) {
case XML_REGEXP_LETTER :
/* all disjoint except in the subgroups */
if ( ( range2 - > type = = XML_REGEXP_LETTER_UPPERCASE ) | |
( range2 - > type = = XML_REGEXP_LETTER_LOWERCASE ) | |
( range2 - > type = = XML_REGEXP_LETTER_TITLECASE ) | |
( range2 - > type = = XML_REGEXP_LETTER_MODIFIER ) | |
( range2 - > type = = XML_REGEXP_LETTER_OTHERS ) )
ret = 1 ;
break ;
case XML_REGEXP_MARK :
if ( ( range2 - > type = = XML_REGEXP_MARK_NONSPACING ) | |
( range2 - > type = = XML_REGEXP_MARK_SPACECOMBINING ) | |
( range2 - > type = = XML_REGEXP_MARK_ENCLOSING ) )
ret = 1 ;
break ;
case XML_REGEXP_NUMBER :
if ( ( range2 - > type = = XML_REGEXP_NUMBER_DECIMAL ) | |
( range2 - > type = = XML_REGEXP_NUMBER_LETTER ) | |
( range2 - > type = = XML_REGEXP_NUMBER_OTHERS ) )
ret = 1 ;
break ;
case XML_REGEXP_PUNCT :
if ( ( range2 - > type = = XML_REGEXP_PUNCT_CONNECTOR ) | |
( range2 - > type = = XML_REGEXP_PUNCT_DASH ) | |
( range2 - > type = = XML_REGEXP_PUNCT_OPEN ) | |
( range2 - > type = = XML_REGEXP_PUNCT_CLOSE ) | |
( range2 - > type = = XML_REGEXP_PUNCT_INITQUOTE ) | |
( range2 - > type = = XML_REGEXP_PUNCT_FINQUOTE ) | |
( range2 - > type = = XML_REGEXP_PUNCT_OTHERS ) )
ret = 1 ;
break ;
case XML_REGEXP_SEPAR :
if ( ( range2 - > type = = XML_REGEXP_SEPAR_SPACE ) | |
( range2 - > type = = XML_REGEXP_SEPAR_LINE ) | |
( range2 - > type = = XML_REGEXP_SEPAR_PARA ) )
ret = 1 ;
break ;
case XML_REGEXP_SYMBOL :
if ( ( range2 - > type = = XML_REGEXP_SYMBOL_MATH ) | |
( range2 - > type = = XML_REGEXP_SYMBOL_CURRENCY ) | |
( range2 - > type = = XML_REGEXP_SYMBOL_MODIFIER ) | |
( range2 - > type = = XML_REGEXP_SYMBOL_OTHERS ) )
ret = 1 ;
break ;
case XML_REGEXP_OTHER :
if ( ( range2 - > type = = XML_REGEXP_OTHER_CONTROL ) | |
( range2 - > type = = XML_REGEXP_OTHER_FORMAT ) | |
( range2 - > type = = XML_REGEXP_OTHER_PRIVATE ) )
ret = 1 ;
break ;
default :
if ( ( range2 - > type > = XML_REGEXP_LETTER ) & &
( range2 - > type < XML_REGEXP_BLOCK_NAME ) )
ret = 0 ;
else {
/* safety net ! */
return ( 1 ) ;
}
}
}
if ( ( ( range1 - > neg = = 0 ) & & ( range2 - > neg ! = 0 ) ) | |
( ( range1 - > neg ! = 0 ) & & ( range2 - > neg = = 0 ) ) )
ret = ! ret ;
2009-09-07 16:58:47 +04:00
return ( ret ) ;
2005-10-18 23:11:55 +04:00
}
2006-02-12 22:14:15 +03:00
/**
* xmlFACompareAtomTypes :
* @ type1 : an atom type
* @ type2 : an atom type
*
* Compares two atoms type to check whether they intersect in some ways ,
* this is used by xmlFACompareAtoms only
*
* Returns 1 if they may intersect and 0 otherwise
*/
static int
xmlFACompareAtomTypes ( xmlRegAtomType type1 , xmlRegAtomType type2 ) {
if ( ( type1 = = XML_REGEXP_EPSILON ) | |
( type1 = = XML_REGEXP_CHARVAL ) | |
( type1 = = XML_REGEXP_RANGES ) | |
( type1 = = XML_REGEXP_SUBREG ) | |
( type1 = = XML_REGEXP_STRING ) | |
( type1 = = XML_REGEXP_ANYCHAR ) )
return ( 1 ) ;
if ( ( type2 = = XML_REGEXP_EPSILON ) | |
( type2 = = XML_REGEXP_CHARVAL ) | |
( type2 = = XML_REGEXP_RANGES ) | |
( type2 = = XML_REGEXP_SUBREG ) | |
( type2 = = XML_REGEXP_STRING ) | |
( type2 = = XML_REGEXP_ANYCHAR ) )
return ( 1 ) ;
if ( type1 = = type2 ) return ( 1 ) ;
/* simplify subsequent compares by making sure type1 < type2 */
if ( type1 > type2 ) {
xmlRegAtomType tmp = type1 ;
type1 = type2 ;
type2 = tmp ;
}
switch ( type1 ) {
case XML_REGEXP_ANYSPACE : /* \s */
2019-09-30 18:04:54 +03:00
/* can't be a letter, number, mark, punctuation, symbol */
2006-02-12 22:14:15 +03:00
if ( ( type2 = = XML_REGEXP_NOTSPACE ) | |
( ( type2 > = XML_REGEXP_LETTER ) & &
( type2 < = XML_REGEXP_LETTER_OTHERS ) ) | |
( ( type2 > = XML_REGEXP_NUMBER ) & &
( type2 < = XML_REGEXP_NUMBER_OTHERS ) ) | |
( ( type2 > = XML_REGEXP_MARK ) & &
( type2 < = XML_REGEXP_MARK_ENCLOSING ) ) | |
( ( type2 > = XML_REGEXP_PUNCT ) & &
( type2 < = XML_REGEXP_PUNCT_OTHERS ) ) | |
( ( type2 > = XML_REGEXP_SYMBOL ) & &
( type2 < = XML_REGEXP_SYMBOL_OTHERS ) )
) return ( 0 ) ;
break ;
case XML_REGEXP_NOTSPACE : /* \S */
break ;
case XML_REGEXP_INITNAME : /* \l */
2019-09-30 18:04:54 +03:00
/* can't be a number, mark, separator, punctuation, symbol or other */
2006-02-12 22:14:15 +03:00
if ( ( type2 = = XML_REGEXP_NOTINITNAME ) | |
( ( type2 > = XML_REGEXP_NUMBER ) & &
( type2 < = XML_REGEXP_NUMBER_OTHERS ) ) | |
( ( type2 > = XML_REGEXP_MARK ) & &
( type2 < = XML_REGEXP_MARK_ENCLOSING ) ) | |
( ( type2 > = XML_REGEXP_SEPAR ) & &
( type2 < = XML_REGEXP_SEPAR_PARA ) ) | |
( ( type2 > = XML_REGEXP_PUNCT ) & &
( type2 < = XML_REGEXP_PUNCT_OTHERS ) ) | |
( ( type2 > = XML_REGEXP_SYMBOL ) & &
( type2 < = XML_REGEXP_SYMBOL_OTHERS ) ) | |
( ( type2 > = XML_REGEXP_OTHER ) & &
( type2 < = XML_REGEXP_OTHER_NA ) )
) return ( 0 ) ;
break ;
case XML_REGEXP_NOTINITNAME : /* \L */
break ;
case XML_REGEXP_NAMECHAR : /* \c */
2019-09-30 18:04:54 +03:00
/* can't be a mark, separator, punctuation, symbol or other */
2006-02-12 22:14:15 +03:00
if ( ( type2 = = XML_REGEXP_NOTNAMECHAR ) | |
( ( type2 > = XML_REGEXP_MARK ) & &
( type2 < = XML_REGEXP_MARK_ENCLOSING ) ) | |
( ( type2 > = XML_REGEXP_PUNCT ) & &
( type2 < = XML_REGEXP_PUNCT_OTHERS ) ) | |
( ( type2 > = XML_REGEXP_SEPAR ) & &
( type2 < = XML_REGEXP_SEPAR_PARA ) ) | |
( ( type2 > = XML_REGEXP_SYMBOL ) & &
( type2 < = XML_REGEXP_SYMBOL_OTHERS ) ) | |
( ( type2 > = XML_REGEXP_OTHER ) & &
( type2 < = XML_REGEXP_OTHER_NA ) )
) return ( 0 ) ;
break ;
case XML_REGEXP_NOTNAMECHAR : /* \C */
break ;
case XML_REGEXP_DECIMAL : /* \d */
2019-09-30 18:04:54 +03:00
/* can't be a letter, mark, separator, punctuation, symbol or other */
2006-02-12 22:14:15 +03:00
if ( ( type2 = = XML_REGEXP_NOTDECIMAL ) | |
( type2 = = XML_REGEXP_REALCHAR ) | |
( ( type2 > = XML_REGEXP_LETTER ) & &
( type2 < = XML_REGEXP_LETTER_OTHERS ) ) | |
( ( type2 > = XML_REGEXP_MARK ) & &
( type2 < = XML_REGEXP_MARK_ENCLOSING ) ) | |
( ( type2 > = XML_REGEXP_PUNCT ) & &
( type2 < = XML_REGEXP_PUNCT_OTHERS ) ) | |
( ( type2 > = XML_REGEXP_SEPAR ) & &
( type2 < = XML_REGEXP_SEPAR_PARA ) ) | |
( ( type2 > = XML_REGEXP_SYMBOL ) & &
( type2 < = XML_REGEXP_SYMBOL_OTHERS ) ) | |
( ( type2 > = XML_REGEXP_OTHER ) & &
( type2 < = XML_REGEXP_OTHER_NA ) )
) return ( 0 ) ;
break ;
case XML_REGEXP_NOTDECIMAL : /* \D */
break ;
case XML_REGEXP_REALCHAR : /* \w */
2019-09-30 18:04:54 +03:00
/* can't be a mark, separator, punctuation, symbol or other */
2006-02-12 22:14:15 +03:00
if ( ( type2 = = XML_REGEXP_NOTDECIMAL ) | |
( ( type2 > = XML_REGEXP_MARK ) & &
( type2 < = XML_REGEXP_MARK_ENCLOSING ) ) | |
( ( type2 > = XML_REGEXP_PUNCT ) & &
( type2 < = XML_REGEXP_PUNCT_OTHERS ) ) | |
( ( type2 > = XML_REGEXP_SEPAR ) & &
( type2 < = XML_REGEXP_SEPAR_PARA ) ) | |
( ( type2 > = XML_REGEXP_SYMBOL ) & &
( type2 < = XML_REGEXP_SYMBOL_OTHERS ) ) | |
( ( type2 > = XML_REGEXP_OTHER ) & &
( type2 < = XML_REGEXP_OTHER_NA ) )
) return ( 0 ) ;
break ;
case XML_REGEXP_NOTREALCHAR : /* \W */
break ;
/*
* at that point we know both type 1 and type2 are from
* character categories are ordered and are different ,
* it becomes simple because this is a partition
*/
case XML_REGEXP_LETTER :
if ( type2 < = XML_REGEXP_LETTER_OTHERS )
return ( 1 ) ;
return ( 0 ) ;
case XML_REGEXP_LETTER_UPPERCASE :
case XML_REGEXP_LETTER_LOWERCASE :
case XML_REGEXP_LETTER_TITLECASE :
case XML_REGEXP_LETTER_MODIFIER :
case XML_REGEXP_LETTER_OTHERS :
return ( 0 ) ;
case XML_REGEXP_MARK :
if ( type2 < = XML_REGEXP_MARK_ENCLOSING )
return ( 1 ) ;
return ( 0 ) ;
case XML_REGEXP_MARK_NONSPACING :
case XML_REGEXP_MARK_SPACECOMBINING :
case XML_REGEXP_MARK_ENCLOSING :
return ( 0 ) ;
case XML_REGEXP_NUMBER :
if ( type2 < = XML_REGEXP_NUMBER_OTHERS )
return ( 1 ) ;
return ( 0 ) ;
case XML_REGEXP_NUMBER_DECIMAL :
case XML_REGEXP_NUMBER_LETTER :
case XML_REGEXP_NUMBER_OTHERS :
return ( 0 ) ;
case XML_REGEXP_PUNCT :
if ( type2 < = XML_REGEXP_PUNCT_OTHERS )
return ( 1 ) ;
return ( 0 ) ;
case XML_REGEXP_PUNCT_CONNECTOR :
case XML_REGEXP_PUNCT_DASH :
case XML_REGEXP_PUNCT_OPEN :
case XML_REGEXP_PUNCT_CLOSE :
case XML_REGEXP_PUNCT_INITQUOTE :
case XML_REGEXP_PUNCT_FINQUOTE :
case XML_REGEXP_PUNCT_OTHERS :
return ( 0 ) ;
case XML_REGEXP_SEPAR :
if ( type2 < = XML_REGEXP_SEPAR_PARA )
return ( 1 ) ;
return ( 0 ) ;
case XML_REGEXP_SEPAR_SPACE :
case XML_REGEXP_SEPAR_LINE :
case XML_REGEXP_SEPAR_PARA :
return ( 0 ) ;
case XML_REGEXP_SYMBOL :
if ( type2 < = XML_REGEXP_SYMBOL_OTHERS )
return ( 1 ) ;
return ( 0 ) ;
case XML_REGEXP_SYMBOL_MATH :
case XML_REGEXP_SYMBOL_CURRENCY :
case XML_REGEXP_SYMBOL_MODIFIER :
case XML_REGEXP_SYMBOL_OTHERS :
return ( 0 ) ;
case XML_REGEXP_OTHER :
if ( type2 < = XML_REGEXP_OTHER_NA )
return ( 1 ) ;
return ( 0 ) ;
case XML_REGEXP_OTHER_CONTROL :
case XML_REGEXP_OTHER_FORMAT :
case XML_REGEXP_OTHER_PRIVATE :
case XML_REGEXP_OTHER_NA :
return ( 0 ) ;
default :
break ;
}
return ( 1 ) ;
}
/**
* xmlFAEqualAtoms :
* @ atom1 : an atom
* @ atom2 : an atom
2009-08-31 18:47:39 +04:00
* @ deep : if not set only compare string pointers
2006-02-12 22:14:15 +03:00
*
* Compares two atoms to check whether they are the same exactly
* this is used to remove equivalent transitions
*
* Returns 1 if same and 0 otherwise
*/
static int
2009-08-31 18:47:39 +04:00
xmlFAEqualAtoms ( xmlRegAtomPtr atom1 , xmlRegAtomPtr atom2 , int deep ) {
2006-02-12 22:14:15 +03:00
int ret = 0 ;
if ( atom1 = = atom2 )
return ( 1 ) ;
if ( ( atom1 = = NULL ) | | ( atom2 = = NULL ) )
return ( 0 ) ;
if ( atom1 - > type ! = atom2 - > type )
return ( 0 ) ;
switch ( atom1 - > type ) {
case XML_REGEXP_EPSILON :
ret = 0 ;
break ;
case XML_REGEXP_STRING :
2009-08-31 18:47:39 +04:00
if ( ! deep )
ret = ( atom1 - > valuep = = atom2 - > valuep ) ;
else
ret = xmlStrEqual ( ( xmlChar * ) atom1 - > valuep ,
( xmlChar * ) atom2 - > valuep ) ;
2006-02-12 22:14:15 +03:00
break ;
case XML_REGEXP_CHARVAL :
ret = ( atom1 - > codepoint = = atom2 - > codepoint ) ;
break ;
case XML_REGEXP_RANGES :
/* too hard to do in the general case */
ret = 0 ;
default :
break ;
}
return ( ret ) ;
}
2002-04-22 20:01:24 +04:00
/**
* xmlFACompareAtoms :
* @ atom1 : an atom
* @ atom2 : an atom
2009-08-31 18:47:39 +04:00
* @ deep : if not set only compare string pointers
2002-04-22 20:01:24 +04:00
*
2005-10-18 23:11:55 +04:00
* Compares two atoms to check whether they intersect in some ways ,
2006-02-12 22:14:15 +03:00
* this is used by xmlFAComputesDeterminism and xmlFARecurseDeterminism only
2002-04-22 20:01:24 +04:00
*
* Returns 1 if yes and 0 otherwise
*/
static int
2009-08-31 18:47:39 +04:00
xmlFACompareAtoms ( xmlRegAtomPtr atom1 , xmlRegAtomPtr atom2 , int deep ) {
2006-02-12 22:14:15 +03:00
int ret = 1 ;
2005-07-19 18:33:55 +04:00
2002-04-22 20:01:24 +04:00
if ( atom1 = = atom2 )
return ( 1 ) ;
if ( ( atom1 = = NULL ) | | ( atom2 = = NULL ) )
return ( 0 ) ;
2006-02-12 22:14:15 +03:00
if ( ( atom1 - > type = = XML_REGEXP_ANYCHAR ) | |
( atom2 - > type = = XML_REGEXP_ANYCHAR ) )
return ( 1 ) ;
if ( atom1 - > type > atom2 - > type ) {
2005-10-18 23:11:55 +04:00
xmlRegAtomPtr tmp ;
tmp = atom1 ;
atom1 = atom2 ;
atom2 = tmp ;
2006-02-12 22:14:15 +03:00
}
if ( atom1 - > type ! = atom2 - > type ) {
ret = xmlFACompareAtomTypes ( atom1 - > type , atom2 - > type ) ;
/* if they can't intersect at the type level break now */
if ( ret = = 0 )
return ( 0 ) ;
2005-10-18 23:11:55 +04:00
}
2002-04-22 20:01:24 +04:00
switch ( atom1 - > type ) {
case XML_REGEXP_STRING :
2009-08-31 18:47:39 +04:00
if ( ! deep )
ret = ( atom1 - > valuep ! = atom2 - > valuep ) ;
2019-09-16 16:36:02 +03:00
else {
xmlChar * val1 = ( xmlChar * ) atom1 - > valuep ;
xmlChar * val2 = ( xmlChar * ) atom2 - > valuep ;
int compound1 = ( xmlStrchr ( val1 , ' | ' ) ! = NULL ) ;
int compound2 = ( xmlStrchr ( val2 , ' | ' ) ! = NULL ) ;
/* Ignore negative match flag for ##other namespaces */
if ( compound1 ! = compound2 )
return ( 0 ) ;
ret = xmlRegStrEqualWildcard ( val1 , val2 ) ;
}
2005-07-19 18:33:55 +04:00
break ;
2002-04-22 20:01:24 +04:00
case XML_REGEXP_EPSILON :
2005-10-18 23:11:55 +04:00
goto not_determinist ;
2002-04-22 20:01:24 +04:00
case XML_REGEXP_CHARVAL :
2005-10-18 23:11:55 +04:00
if ( atom2 - > type = = XML_REGEXP_CHARVAL ) {
2006-02-12 22:14:15 +03:00
ret = ( atom1 - > codepoint = = atom2 - > codepoint ) ;
2005-10-18 23:11:55 +04:00
} else {
2006-02-12 22:14:15 +03:00
ret = xmlRegCheckCharacter ( atom2 , atom1 - > codepoint ) ;
if ( ret < 0 )
ret = 1 ;
}
break ;
case XML_REGEXP_RANGES :
if ( atom2 - > type = = XML_REGEXP_RANGES ) {
2005-10-18 23:11:55 +04:00
int i , j , res ;
xmlRegRangePtr r1 , r2 ;
/*
* need to check that none of the ranges eventually matches
*/
for ( i = 0 ; i < atom1 - > nbRanges ; i + + ) {
for ( j = 0 ; j < atom2 - > nbRanges ; j + + ) {
r1 = atom1 - > ranges [ i ] ;
r2 = atom2 - > ranges [ j ] ;
res = xmlFACompareRanges ( r1 , r2 ) ;
if ( res = = 1 ) {
ret = 1 ;
goto done ;
}
}
}
ret = 0 ;
}
break ;
2002-04-22 20:01:24 +04:00
default :
2005-10-18 23:11:55 +04:00
goto not_determinist ;
2002-04-22 20:01:24 +04:00
}
2005-10-18 23:11:55 +04:00
done :
2005-08-09 15:09:52 +04:00
if ( atom1 - > neg ! = atom2 - > neg ) {
2005-07-19 18:33:55 +04:00
ret = ! ret ;
2005-08-09 15:09:52 +04:00
}
2005-10-18 23:11:55 +04:00
if ( ret = = 0 )
return ( 0 ) ;
not_determinist :
return ( 1 ) ;
2002-04-22 20:01:24 +04:00
}
/**
* xmlFARecurseDeterminism :
* @ ctxt : a regexp parser context
*
* Check whether the associated regexp is determinist ,
* should be called after xmlFAEliminateEpsilonTransitions ( )
*
*/
static int
xmlFARecurseDeterminism ( xmlRegParserCtxtPtr ctxt , xmlRegStatePtr state ,
2023-03-09 08:58:24 +03:00
int fromnr , int tonr , xmlRegAtomPtr atom ) {
2002-04-22 20:01:24 +04:00
int ret = 1 ;
2005-10-18 23:11:55 +04:00
int res ;
2005-09-26 21:18:17 +04:00
int transnr , nbTrans ;
2002-04-22 20:01:24 +04:00
xmlRegTransPtr t1 ;
2009-08-31 18:47:39 +04:00
int deep = 1 ;
2002-04-22 20:01:24 +04:00
if ( state = = NULL )
return ( ret ) ;
2012-08-27 08:03:40 +04:00
if ( state - > markd = = XML_REGEXP_MARK_VISITED )
return ( ret ) ;
2009-08-31 18:47:39 +04:00
if ( ctxt - > flags & AM_AUTOMATA_RNG )
deep = 0 ;
2005-09-26 21:18:17 +04:00
/*
* don ' t recurse on transitions potentially added in the course of
* the elimination .
*/
nbTrans = state - > nbTrans ;
for ( transnr = 0 ; transnr < nbTrans ; transnr + + ) {
2002-04-22 20:01:24 +04:00
t1 = & ( state - > trans [ transnr ] ) ;
/*
* check transitions conflicting with the one looked at
*/
2023-03-09 08:58:24 +03:00
if ( ( t1 - > to < 0 ) | | ( t1 - > to = = fromnr ) )
continue ;
2002-04-22 20:01:24 +04:00
if ( t1 - > atom = = NULL ) {
2012-08-27 08:03:40 +04:00
state - > markd = XML_REGEXP_MARK_VISITED ;
2005-10-18 23:11:55 +04:00
res = xmlFARecurseDeterminism ( ctxt , ctxt - > states [ t1 - > to ] ,
2023-03-09 08:58:24 +03:00
fromnr , tonr , atom ) ;
2005-10-18 23:11:55 +04:00
if ( res = = 0 ) {
ret = 0 ;
2005-10-20 19:55:25 +04:00
/* t1->nd = 1; */
2005-10-18 23:11:55 +04:00
}
2002-04-22 20:01:24 +04:00
continue ;
}
2009-08-31 18:47:39 +04:00
if ( xmlFACompareAtoms ( t1 - > atom , atom , deep ) ) {
2023-03-09 08:58:24 +03:00
/* Treat equal transitions as deterministic. */
if ( ( t1 - > to ! = tonr ) | |
( ! xmlFAEqualAtoms ( t1 - > atom , atom , deep ) ) )
ret = 0 ;
2005-10-18 23:11:55 +04:00
/* mark the transition as non-deterministic */
t1 - > nd = 1 ;
}
2002-04-22 20:01:24 +04:00
}
return ( ret ) ;
}
2020-07-11 22:32:10 +03:00
/**
* xmlFAFinishRecurseDeterminism :
* @ ctxt : a regexp parser context
*
* Reset flags after checking determinism .
*/
static void
xmlFAFinishRecurseDeterminism ( xmlRegParserCtxtPtr ctxt , xmlRegStatePtr state ) {
int transnr , nbTrans ;
if ( state = = NULL )
return ;
if ( state - > markd ! = XML_REGEXP_MARK_VISITED )
return ;
state - > markd = 0 ;
nbTrans = state - > nbTrans ;
for ( transnr = 0 ; transnr < nbTrans ; transnr + + ) {
xmlRegTransPtr t1 = & state - > trans [ transnr ] ;
if ( ( t1 - > atom = = NULL ) & & ( t1 - > to > = 0 ) )
xmlFAFinishRecurseDeterminism ( ctxt , ctxt - > states [ t1 - > to ] ) ;
}
}
2002-04-22 20:01:24 +04:00
/**
* xmlFAComputesDeterminism :
* @ ctxt : a regexp parser context
*
* Check whether the associated regexp is determinist ,
* should be called after xmlFAEliminateEpsilonTransitions ( )
*
*/
static int
xmlFAComputesDeterminism ( xmlRegParserCtxtPtr ctxt ) {
int statenr , transnr ;
xmlRegStatePtr state ;
2005-10-18 23:11:55 +04:00
xmlRegTransPtr t1 , t2 , last ;
2002-04-22 20:01:24 +04:00
int i ;
int ret = 1 ;
2009-08-31 18:47:39 +04:00
int deep = 1 ;
2002-04-22 20:01:24 +04:00
if ( ctxt - > determinist ! = - 1 )
return ( ctxt - > determinist ) ;
2009-08-31 18:47:39 +04:00
if ( ctxt - > flags & AM_AUTOMATA_RNG )
deep = 0 ;
2002-04-22 20:01:24 +04:00
/*
2005-10-18 23:11:55 +04:00
* First cleanup the automata removing cancelled transitions
2002-04-22 20:01:24 +04:00
*/
for ( statenr = 0 ; statenr < ctxt - > nbStates ; statenr + + ) {
state = ctxt - > states [ statenr ] ;
if ( state = = NULL )
continue ;
2005-08-10 01:40:08 +04:00
if ( state - > nbTrans < 2 )
continue ;
2002-04-22 20:01:24 +04:00
for ( transnr = 0 ; transnr < state - > nbTrans ; transnr + + ) {
t1 = & ( state - > trans [ transnr ] ) ;
/*
* Determinism checks in case of counted or all transitions
* will have to be handled separately
*/
2005-10-18 23:11:55 +04:00
if ( t1 - > atom = = NULL ) {
2005-10-20 19:55:25 +04:00
/* t1->nd = 1; */
2002-04-22 20:01:24 +04:00
continue ;
2005-10-18 23:11:55 +04:00
}
2023-03-09 07:34:38 +03:00
if ( t1 - > to < 0 ) /* eliminated */
2002-04-22 20:01:24 +04:00
continue ;
for ( i = 0 ; i < transnr ; i + + ) {
t2 = & ( state - > trans [ i ] ) ;
2023-03-09 07:34:38 +03:00
if ( t2 - > to < 0 ) /* eliminated */
2002-04-22 20:01:24 +04:00
continue ;
if ( t2 - > atom ! = NULL ) {
if ( t1 - > to = = t2 - > to ) {
2009-08-31 18:47:39 +04:00
/*
* Here we use deep because we want to keep the
* transitions which indicate a conflict
*/
if ( xmlFAEqualAtoms ( t1 - > atom , t2 - > atom , deep ) & &
2009-08-12 14:21:42 +04:00
( t1 - > counter = = t2 - > counter ) & &
( t1 - > count = = t2 - > count ) )
2004-05-06 08:17:26 +04:00
t2 - > to = - 1 ; /* eliminated */
2005-10-18 23:11:55 +04:00
}
}
}
}
}
/*
* Check for all states that there aren ' t 2 transitions
* with the same atom and a different target .
*/
for ( statenr = 0 ; statenr < ctxt - > nbStates ; statenr + + ) {
state = ctxt - > states [ statenr ] ;
if ( state = = NULL )
continue ;
if ( state - > nbTrans < 2 )
continue ;
last = NULL ;
for ( transnr = 0 ; transnr < state - > nbTrans ; transnr + + ) {
t1 = & ( state - > trans [ transnr ] ) ;
/*
* Determinism checks in case of counted or all transitions
* will have to be handled separately
*/
if ( t1 - > atom = = NULL ) {
continue ;
}
2023-03-09 07:34:38 +03:00
if ( t1 - > to < 0 ) /* eliminated */
2005-10-18 23:11:55 +04:00
continue ;
for ( i = 0 ; i < transnr ; i + + ) {
t2 = & ( state - > trans [ i ] ) ;
2023-03-09 07:34:38 +03:00
if ( t2 - > to < 0 ) /* eliminated */
2005-10-18 23:11:55 +04:00
continue ;
if ( t2 - > atom ! = NULL ) {
2009-08-31 18:47:39 +04:00
/*
* But here we don ' t use deep because we want to
* find transitions which indicate a conflict
*/
if ( xmlFACompareAtoms ( t1 - > atom , t2 - > atom , 1 ) ) {
2023-03-09 08:58:24 +03:00
/*
* Treat equal counter transitions that couldn ' t be
* eliminated as deterministic .
*/
if ( ( t1 - > to ! = t2 - > to ) | |
( t1 - > counter = = t2 - > counter ) | |
( ! xmlFAEqualAtoms ( t1 - > atom , t2 - > atom , deep ) ) )
ret = 0 ;
2005-10-18 23:11:55 +04:00
/* mark the transitions as non-deterministic ones */
t1 - > nd = 1 ;
t2 - > nd = 1 ;
last = t1 ;
2002-04-22 20:01:24 +04:00
}
2023-03-09 07:34:38 +03:00
} else {
2023-03-09 08:58:24 +03:00
int res ;
2002-04-22 20:01:24 +04:00
/*
* do the closure in case of remaining specific
* epsilon transitions like choices or all
*/
2023-03-09 08:58:24 +03:00
res = xmlFARecurseDeterminism ( ctxt , ctxt - > states [ t2 - > to ] ,
statenr , t1 - > to , t1 - > atom ) ;
xmlFAFinishRecurseDeterminism ( ctxt , ctxt - > states [ t2 - > to ] ) ;
2005-10-18 23:11:55 +04:00
/* don't shortcut the computation so all non deterministic
transition get marked down
2002-04-22 20:01:24 +04:00
if ( ret = = 0 )
2005-10-20 19:55:25 +04:00
return ( 0 ) ;
*/
2023-03-09 08:58:24 +03:00
if ( res = = 0 ) {
2005-10-18 23:11:55 +04:00
t1 - > nd = 1 ;
2005-10-20 19:55:25 +04:00
/* t2->nd = 1; */
2005-10-18 23:11:55 +04:00
last = t1 ;
2023-03-09 08:58:24 +03:00
ret = 0 ;
2005-10-18 23:11:55 +04:00
}
2002-04-22 20:01:24 +04:00
}
}
2005-10-18 23:11:55 +04:00
/* don't shortcut the computation so all non deterministic
transition get marked down
2002-04-22 20:01:24 +04:00
if ( ret = = 0 )
2005-10-18 23:11:55 +04:00
break ; */
2002-04-22 20:01:24 +04:00
}
2005-10-18 23:11:55 +04:00
/*
* mark specifically the last non - deterministic transition
* from a state since there is no need to set - up rollback
* from it
*/
if ( last ! = NULL ) {
last - > nd = 2 ;
}
/* don't shortcut the computation so all non deterministic
transition get marked down
2002-04-22 20:01:24 +04:00
if ( ret = = 0 )
2005-10-18 23:11:55 +04:00
break ; */
2002-04-22 20:01:24 +04:00
}
2005-10-18 23:11:55 +04:00
2002-04-22 20:01:24 +04:00
ctxt - > determinist = ret ;
return ( ret ) ;
}
2002-04-16 19:50:10 +04:00
/************************************************************************
2012-09-11 09:26:36 +04:00
* *
2002-04-16 19:50:10 +04:00
* Routines to check input against transition atoms *
2012-09-11 09:26:36 +04:00
* *
2002-04-16 19:50:10 +04:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static int
xmlRegCheckCharacterRange ( xmlRegAtomType type , int codepoint , int neg ,
int start , int end , const xmlChar * blockName ) {
int ret = 0 ;
switch ( type ) {
case XML_REGEXP_STRING :
case XML_REGEXP_SUBREG :
case XML_REGEXP_RANGES :
case XML_REGEXP_EPSILON :
return ( - 1 ) ;
case XML_REGEXP_ANYCHAR :
ret = ( ( codepoint ! = ' \n ' ) & & ( codepoint ! = ' \r ' ) ) ;
break ;
case XML_REGEXP_CHARVAL :
ret = ( ( codepoint > = start ) & & ( codepoint < = end ) ) ;
break ;
case XML_REGEXP_NOTSPACE :
neg = ! neg ;
2017-10-21 14:49:31 +03:00
/* Falls through. */
2002-04-16 19:50:10 +04:00
case XML_REGEXP_ANYSPACE :
ret = ( ( codepoint = = ' \n ' ) | | ( codepoint = = ' \r ' ) | |
( codepoint = = ' \t ' ) | | ( codepoint = = ' ' ) ) ;
break ;
case XML_REGEXP_NOTINITNAME :
neg = ! neg ;
2017-10-21 14:49:31 +03:00
/* Falls through. */
2002-04-16 19:50:10 +04:00
case XML_REGEXP_INITNAME :
2012-09-11 09:26:36 +04:00
ret = ( IS_LETTER ( codepoint ) | |
2002-04-16 19:50:10 +04:00
( codepoint = = ' _ ' ) | | ( codepoint = = ' : ' ) ) ;
break ;
case XML_REGEXP_NOTNAMECHAR :
neg = ! neg ;
2017-10-21 14:49:31 +03:00
/* Falls through. */
2002-04-16 19:50:10 +04:00
case XML_REGEXP_NAMECHAR :
2003-10-18 08:53:14 +04:00
ret = ( IS_LETTER ( codepoint ) | | IS_DIGIT ( codepoint ) | |
2002-04-16 19:50:10 +04:00
( codepoint = = ' . ' ) | | ( codepoint = = ' - ' ) | |
( codepoint = = ' _ ' ) | | ( codepoint = = ' : ' ) | |
2003-10-18 08:53:14 +04:00
IS_COMBINING ( codepoint ) | | IS_EXTENDER ( codepoint ) ) ;
2002-04-16 19:50:10 +04:00
break ;
case XML_REGEXP_NOTDECIMAL :
neg = ! neg ;
2017-10-21 14:49:31 +03:00
/* Falls through. */
2002-04-16 19:50:10 +04:00
case XML_REGEXP_DECIMAL :
ret = xmlUCSIsCatNd ( codepoint ) ;
break ;
case XML_REGEXP_REALCHAR :
neg = ! neg ;
2017-10-21 14:49:31 +03:00
/* Falls through. */
2002-04-16 19:50:10 +04:00
case XML_REGEXP_NOTREALCHAR :
ret = xmlUCSIsCatP ( codepoint ) ;
if ( ret = = 0 )
ret = xmlUCSIsCatZ ( codepoint ) ;
if ( ret = = 0 )
ret = xmlUCSIsCatC ( codepoint ) ;
break ;
case XML_REGEXP_LETTER :
ret = xmlUCSIsCatL ( codepoint ) ;
break ;
case XML_REGEXP_LETTER_UPPERCASE :
ret = xmlUCSIsCatLu ( codepoint ) ;
break ;
case XML_REGEXP_LETTER_LOWERCASE :
ret = xmlUCSIsCatLl ( codepoint ) ;
break ;
case XML_REGEXP_LETTER_TITLECASE :
ret = xmlUCSIsCatLt ( codepoint ) ;
break ;
case XML_REGEXP_LETTER_MODIFIER :
ret = xmlUCSIsCatLm ( codepoint ) ;
break ;
case XML_REGEXP_LETTER_OTHERS :
ret = xmlUCSIsCatLo ( codepoint ) ;
break ;
case XML_REGEXP_MARK :
ret = xmlUCSIsCatM ( codepoint ) ;
break ;
case XML_REGEXP_MARK_NONSPACING :
ret = xmlUCSIsCatMn ( codepoint ) ;
break ;
case XML_REGEXP_MARK_SPACECOMBINING :
ret = xmlUCSIsCatMc ( codepoint ) ;
break ;
case XML_REGEXP_MARK_ENCLOSING :
ret = xmlUCSIsCatMe ( codepoint ) ;
break ;
case XML_REGEXP_NUMBER :
ret = xmlUCSIsCatN ( codepoint ) ;
break ;
case XML_REGEXP_NUMBER_DECIMAL :
ret = xmlUCSIsCatNd ( codepoint ) ;
break ;
case XML_REGEXP_NUMBER_LETTER :
ret = xmlUCSIsCatNl ( codepoint ) ;
break ;
case XML_REGEXP_NUMBER_OTHERS :
ret = xmlUCSIsCatNo ( codepoint ) ;
break ;
case XML_REGEXP_PUNCT :
ret = xmlUCSIsCatP ( codepoint ) ;
break ;
case XML_REGEXP_PUNCT_CONNECTOR :
ret = xmlUCSIsCatPc ( codepoint ) ;
break ;
case XML_REGEXP_PUNCT_DASH :
ret = xmlUCSIsCatPd ( codepoint ) ;
break ;
case XML_REGEXP_PUNCT_OPEN :
ret = xmlUCSIsCatPs ( codepoint ) ;
break ;
case XML_REGEXP_PUNCT_CLOSE :
ret = xmlUCSIsCatPe ( codepoint ) ;
break ;
case XML_REGEXP_PUNCT_INITQUOTE :
ret = xmlUCSIsCatPi ( codepoint ) ;
break ;
case XML_REGEXP_PUNCT_FINQUOTE :
ret = xmlUCSIsCatPf ( codepoint ) ;
break ;
case XML_REGEXP_PUNCT_OTHERS :
ret = xmlUCSIsCatPo ( codepoint ) ;
break ;
case XML_REGEXP_SEPAR :
ret = xmlUCSIsCatZ ( codepoint ) ;
break ;
case XML_REGEXP_SEPAR_SPACE :
ret = xmlUCSIsCatZs ( codepoint ) ;
break ;
case XML_REGEXP_SEPAR_LINE :
ret = xmlUCSIsCatZl ( codepoint ) ;
break ;
case XML_REGEXP_SEPAR_PARA :
ret = xmlUCSIsCatZp ( codepoint ) ;
break ;
case XML_REGEXP_SYMBOL :
ret = xmlUCSIsCatS ( codepoint ) ;
break ;
case XML_REGEXP_SYMBOL_MATH :
ret = xmlUCSIsCatSm ( codepoint ) ;
break ;
case XML_REGEXP_SYMBOL_CURRENCY :
ret = xmlUCSIsCatSc ( codepoint ) ;
break ;
case XML_REGEXP_SYMBOL_MODIFIER :
ret = xmlUCSIsCatSk ( codepoint ) ;
break ;
case XML_REGEXP_SYMBOL_OTHERS :
ret = xmlUCSIsCatSo ( codepoint ) ;
break ;
case XML_REGEXP_OTHER :
ret = xmlUCSIsCatC ( codepoint ) ;
break ;
case XML_REGEXP_OTHER_CONTROL :
ret = xmlUCSIsCatCc ( codepoint ) ;
break ;
case XML_REGEXP_OTHER_FORMAT :
ret = xmlUCSIsCatCf ( codepoint ) ;
break ;
case XML_REGEXP_OTHER_PRIVATE :
ret = xmlUCSIsCatCo ( codepoint ) ;
break ;
case XML_REGEXP_OTHER_NA :
/* ret = xmlUCSIsCatCn(codepoint); */
/* Seems it doesn't exist anymore in recent Unicode releases */
ret = 0 ;
break ;
case XML_REGEXP_BLOCK_NAME :
ret = xmlUCSIsBlock ( codepoint , ( const char * ) blockName ) ;
break ;
}
if ( neg )
return ( ! ret ) ;
return ( ret ) ;
}
static int
xmlRegCheckCharacter ( xmlRegAtomPtr atom , int codepoint ) {
int i , ret = 0 ;
xmlRegRangePtr range ;
2003-10-18 08:53:14 +04:00
if ( ( atom = = NULL ) | | ( ! IS_CHAR ( codepoint ) ) )
2002-04-16 19:50:10 +04:00
return ( - 1 ) ;
switch ( atom - > type ) {
case XML_REGEXP_SUBREG :
case XML_REGEXP_EPSILON :
return ( - 1 ) ;
case XML_REGEXP_CHARVAL :
return ( codepoint = = atom - > codepoint ) ;
case XML_REGEXP_RANGES : {
int accept = 0 ;
2003-11-24 16:04:35 +03:00
2002-04-16 19:50:10 +04:00
for ( i = 0 ; i < atom - > nbRanges ; i + + ) {
range = atom - > ranges [ i ] ;
2003-11-24 17:27:26 +03:00
if ( range - > neg = = 2 ) {
2002-04-16 19:50:10 +04:00
ret = xmlRegCheckCharacterRange ( range - > type , codepoint ,
0 , range - > start , range - > end ,
range - > blockName ) ;
if ( ret ! = 0 )
return ( 0 ) ; /* excluded char */
2003-11-24 17:27:26 +03:00
} else if ( range - > neg ) {
ret = xmlRegCheckCharacterRange ( range - > type , codepoint ,
0 , range - > start , range - > end ,
range - > blockName ) ;
if ( ret = = 0 )
2003-11-24 16:04:35 +03:00
accept = 1 ;
2003-11-24 17:27:26 +03:00
else
return ( 0 ) ;
2002-04-16 19:50:10 +04:00
} else {
ret = xmlRegCheckCharacterRange ( range - > type , codepoint ,
0 , range - > start , range - > end ,
range - > blockName ) ;
if ( ret ! = 0 )
accept = 1 ; /* might still be excluded */
}
}
return ( accept ) ;
}
case XML_REGEXP_STRING :
printf ( " TODO: XML_REGEXP_STRING \n " ) ;
return ( - 1 ) ;
case XML_REGEXP_ANYCHAR :
case XML_REGEXP_ANYSPACE :
case XML_REGEXP_NOTSPACE :
case XML_REGEXP_INITNAME :
case XML_REGEXP_NOTINITNAME :
case XML_REGEXP_NAMECHAR :
case XML_REGEXP_NOTNAMECHAR :
case XML_REGEXP_DECIMAL :
case XML_REGEXP_NOTDECIMAL :
case XML_REGEXP_REALCHAR :
case XML_REGEXP_NOTREALCHAR :
case XML_REGEXP_LETTER :
case XML_REGEXP_LETTER_UPPERCASE :
case XML_REGEXP_LETTER_LOWERCASE :
case XML_REGEXP_LETTER_TITLECASE :
case XML_REGEXP_LETTER_MODIFIER :
case XML_REGEXP_LETTER_OTHERS :
case XML_REGEXP_MARK :
case XML_REGEXP_MARK_NONSPACING :
case XML_REGEXP_MARK_SPACECOMBINING :
case XML_REGEXP_MARK_ENCLOSING :
case XML_REGEXP_NUMBER :
case XML_REGEXP_NUMBER_DECIMAL :
case XML_REGEXP_NUMBER_LETTER :
case XML_REGEXP_NUMBER_OTHERS :
case XML_REGEXP_PUNCT :
case XML_REGEXP_PUNCT_CONNECTOR :
case XML_REGEXP_PUNCT_DASH :
case XML_REGEXP_PUNCT_OPEN :
case XML_REGEXP_PUNCT_CLOSE :
case XML_REGEXP_PUNCT_INITQUOTE :
case XML_REGEXP_PUNCT_FINQUOTE :
case XML_REGEXP_PUNCT_OTHERS :
case XML_REGEXP_SEPAR :
case XML_REGEXP_SEPAR_SPACE :
case XML_REGEXP_SEPAR_LINE :
case XML_REGEXP_SEPAR_PARA :
case XML_REGEXP_SYMBOL :
case XML_REGEXP_SYMBOL_MATH :
case XML_REGEXP_SYMBOL_CURRENCY :
case XML_REGEXP_SYMBOL_MODIFIER :
case XML_REGEXP_SYMBOL_OTHERS :
case XML_REGEXP_OTHER :
case XML_REGEXP_OTHER_CONTROL :
case XML_REGEXP_OTHER_FORMAT :
case XML_REGEXP_OTHER_PRIVATE :
case XML_REGEXP_OTHER_NA :
case XML_REGEXP_BLOCK_NAME :
ret = xmlRegCheckCharacterRange ( atom - > type , codepoint , 0 , 0 , 0 ,
( const xmlChar * ) atom - > valuep ) ;
if ( atom - > neg )
ret = ! ret ;
break ;
}
return ( ret ) ;
}
/************************************************************************
2012-09-11 09:26:36 +04:00
* *
2004-05-06 08:17:26 +04:00
* Saving and restoring state of an execution context *
2012-09-11 09:26:36 +04:00
* *
2002-04-16 19:50:10 +04:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static void
xmlFARegExecSave ( xmlRegExecCtxtPtr exec ) {
2005-09-15 17:09:00 +04:00
# ifdef MAX_PUSH
if ( exec - > nbPush > MAX_PUSH ) {
2023-09-22 16:25:40 +03:00
exec - > status = XML_REGEXP_INTERNAL_LIMIT ;
2005-09-15 17:09:00 +04:00
return ;
}
exec - > nbPush + + ;
# endif
2002-04-16 19:50:10 +04:00
if ( exec - > maxRollbacks = = 0 ) {
exec - > maxRollbacks = 4 ;
exec - > rollbacks = ( xmlRegExecRollback * ) xmlMalloc ( exec - > maxRollbacks *
sizeof ( xmlRegExecRollback ) ) ;
if ( exec - > rollbacks = = NULL ) {
exec - > maxRollbacks = 0 ;
2023-09-23 18:31:55 +03:00
exec - > status = XML_REGEXP_OUT_OF_MEMORY ;
2002-04-16 19:50:10 +04:00
return ;
}
memset ( exec - > rollbacks , 0 ,
exec - > maxRollbacks * sizeof ( xmlRegExecRollback ) ) ;
} else if ( exec - > nbRollbacks > = exec - > maxRollbacks ) {
xmlRegExecRollback * tmp ;
int len = exec - > maxRollbacks ;
exec - > maxRollbacks * = 2 ;
tmp = ( xmlRegExecRollback * ) xmlRealloc ( exec - > rollbacks ,
exec - > maxRollbacks * sizeof ( xmlRegExecRollback ) ) ;
if ( tmp = = NULL ) {
exec - > maxRollbacks / = 2 ;
2023-09-23 18:31:55 +03:00
exec - > status = XML_REGEXP_OUT_OF_MEMORY ;
2002-04-16 19:50:10 +04:00
return ;
}
exec - > rollbacks = tmp ;
tmp = & exec - > rollbacks [ len ] ;
memset ( tmp , 0 , ( exec - > maxRollbacks - len ) * sizeof ( xmlRegExecRollback ) ) ;
}
exec - > rollbacks [ exec - > nbRollbacks ] . state = exec - > state ;
exec - > rollbacks [ exec - > nbRollbacks ] . index = exec - > index ;
exec - > rollbacks [ exec - > nbRollbacks ] . nextbranch = exec - > transno + 1 ;
if ( exec - > comp - > nbCounters > 0 ) {
if ( exec - > rollbacks [ exec - > nbRollbacks ] . counts = = NULL ) {
exec - > rollbacks [ exec - > nbRollbacks ] . counts = ( int * )
xmlMalloc ( exec - > comp - > nbCounters * sizeof ( int ) ) ;
if ( exec - > rollbacks [ exec - > nbRollbacks ] . counts = = NULL ) {
2023-09-22 16:25:40 +03:00
exec - > status = XML_REGEXP_OUT_OF_MEMORY ;
2002-04-16 19:50:10 +04:00
return ;
}
}
memcpy ( exec - > rollbacks [ exec - > nbRollbacks ] . counts , exec - > counts ,
exec - > comp - > nbCounters * sizeof ( int ) ) ;
}
exec - > nbRollbacks + + ;
}
static void
xmlFARegExecRollBack ( xmlRegExecCtxtPtr exec ) {
2023-09-23 18:31:55 +03:00
if ( exec - > status ! = XML_REGEXP_OK )
return ;
2002-04-16 19:50:10 +04:00
if ( exec - > nbRollbacks < = 0 ) {
2023-09-22 16:25:40 +03:00
exec - > status = XML_REGEXP_NOT_FOUND ;
2002-04-16 19:50:10 +04:00
return ;
}
exec - > nbRollbacks - - ;
exec - > state = exec - > rollbacks [ exec - > nbRollbacks ] . state ;
exec - > index = exec - > rollbacks [ exec - > nbRollbacks ] . index ;
exec - > transno = exec - > rollbacks [ exec - > nbRollbacks ] . nextbranch ;
if ( exec - > comp - > nbCounters > 0 ) {
if ( exec - > rollbacks [ exec - > nbRollbacks ] . counts = = NULL ) {
fprintf ( stderr , " exec save: allocation failed " ) ;
2023-09-22 16:25:40 +03:00
exec - > status = XML_REGEXP_INTERNAL_ERROR ;
2002-04-16 19:50:10 +04:00
return ;
}
2013-09-11 10:59:06 +04:00
if ( exec - > counts ) {
memcpy ( exec - > counts , exec - > rollbacks [ exec - > nbRollbacks ] . counts ,
2002-04-16 19:50:10 +04:00
exec - > comp - > nbCounters * sizeof ( int ) ) ;
2013-09-11 10:59:06 +04:00
}
2002-04-16 19:50:10 +04:00
}
}
/************************************************************************
2012-09-11 09:26:36 +04:00
* *
2004-05-06 08:17:26 +04:00
* Verifier , running an input against a compiled regexp *
2012-09-11 09:26:36 +04:00
* *
2002-04-16 19:50:10 +04:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static int
xmlFARegExec ( xmlRegexpPtr comp , const xmlChar * content ) {
xmlRegExecCtxt execval ;
xmlRegExecCtxtPtr exec = & execval ;
2005-10-18 23:11:55 +04:00
int ret , codepoint = 0 , len , deter ;
2002-04-16 19:50:10 +04:00
exec - > inputString = content ;
exec - > index = 0 ;
2005-09-15 17:09:00 +04:00
exec - > nbPush = 0 ;
2002-04-16 19:50:10 +04:00
exec - > determinist = 1 ;
exec - > maxRollbacks = 0 ;
exec - > nbRollbacks = 0 ;
exec - > rollbacks = NULL ;
2023-09-22 16:25:40 +03:00
exec - > status = XML_REGEXP_OK ;
2002-04-16 19:50:10 +04:00
exec - > comp = comp ;
exec - > state = comp - > states [ 0 ] ;
exec - > transno = 0 ;
exec - > transcount = 0 ;
2003-11-24 16:04:35 +03:00
exec - > inputStack = NULL ;
exec - > inputStackMax = 0 ;
2002-04-16 19:50:10 +04:00
if ( comp - > nbCounters > 0 ) {
exec - > counts = ( int * ) xmlMalloc ( comp - > nbCounters * sizeof ( int ) ) ;
2003-10-08 12:53:17 +04:00
if ( exec - > counts = = NULL ) {
2023-09-23 18:31:55 +03:00
return ( XML_REGEXP_OUT_OF_MEMORY ) ;
2003-10-08 12:53:17 +04:00
}
2002-04-16 19:50:10 +04:00
memset ( exec - > counts , 0 , comp - > nbCounters * sizeof ( int ) ) ;
} else
exec - > counts = NULL ;
2023-09-22 16:25:40 +03:00
while ( ( exec - > status = = XML_REGEXP_OK ) & & ( exec - > state ! = NULL ) & &
2002-04-16 19:50:10 +04:00
( ( exec - > inputString [ exec - > index ] ! = 0 ) | |
2008-05-12 17:15:35 +04:00
( ( exec - > state ! = NULL ) & &
( exec - > state - > type ! = XML_REGEXP_FINAL_STATE ) ) ) ) {
2002-04-16 19:50:10 +04:00
xmlRegTransPtr trans ;
xmlRegAtomPtr atom ;
/*
2004-04-26 19:40:47 +04:00
* If end of input on non - terminal state , rollback , however we may
2002-04-16 19:50:10 +04:00
* still have epsilon like transition for counted transitions
2004-04-26 19:40:47 +04:00
* on counters , in that case don ' t break too early . Additionally ,
* if we are working on a range like " AB{0,2} " , where B is not present ,
* we don ' t want to break .
2002-04-16 19:50:10 +04:00
*/
2006-03-10 03:36:23 +03:00
len = 1 ;
2004-04-26 19:40:47 +04:00
if ( ( exec - > inputString [ exec - > index ] = = 0 ) & & ( exec - > counts = = NULL ) ) {
2004-05-06 08:17:26 +04:00
/*
* if there is a transition , we must check if
* atom allows minOccurs of 0
*/
if ( exec - > transno < exec - > state - > nbTrans ) {
2004-04-26 19:40:47 +04:00
trans = & exec - > state - > trans [ exec - > transno ] ;
if ( trans - > to > = 0 ) {
atom = trans - > atom ;
if ( ! ( ( atom - > min = = 0 ) & & ( atom - > max > 0 ) ) )
goto rollback ;
}
} else
goto rollback ;
}
2002-04-16 19:50:10 +04:00
exec - > transcount = 0 ;
for ( ; exec - > transno < exec - > state - > nbTrans ; exec - > transno + + ) {
trans = & exec - > state - > trans [ exec - > transno ] ;
if ( trans - > to < 0 )
continue ;
atom = trans - > atom ;
ret = 0 ;
2005-10-18 23:11:55 +04:00
deter = 1 ;
2002-04-16 19:50:10 +04:00
if ( trans - > count > = 0 ) {
int count ;
xmlRegCounterPtr counter ;
2006-03-10 03:36:23 +03:00
if ( exec - > counts = = NULL ) {
2023-09-22 16:25:40 +03:00
exec - > status = XML_REGEXP_INTERNAL_ERROR ;
2006-03-10 03:36:23 +03:00
goto error ;
}
2002-04-16 19:50:10 +04:00
/*
* A counted transition .
*/
count = exec - > counts [ trans - > count ] ;
counter = & exec - > comp - > counters [ trans - > count ] ;
ret = ( ( count > = counter - > min ) & & ( count < = counter - > max ) ) ;
2005-10-18 23:11:55 +04:00
if ( ( ret ) & & ( counter - > min ! = counter - > max ) )
deter = 0 ;
2002-04-16 19:50:10 +04:00
} else if ( atom = = NULL ) {
fprintf ( stderr , " epsilon transition left at runtime \n " ) ;
2023-09-22 16:25:40 +03:00
exec - > status = XML_REGEXP_INTERNAL_ERROR ;
2002-04-16 19:50:10 +04:00
break ;
} else if ( exec - > inputString [ exec - > index ] ! = 0 ) {
2023-09-22 16:25:40 +03:00
len = 4 ;
codepoint = xmlGetUTF8Char ( & exec - > inputString [ exec - > index ] ,
& len ) ;
if ( codepoint < 0 ) {
exec - > status = XML_REGEXP_INVALID_UTF8 ;
goto error ;
}
2002-04-16 19:50:10 +04:00
ret = xmlRegCheckCharacter ( atom , codepoint ) ;
2004-04-26 19:40:47 +04:00
if ( ( ret = = 1 ) & & ( atom - > min > = 0 ) & & ( atom - > max > 0 ) ) {
2002-04-16 19:50:10 +04:00
xmlRegStatePtr to = comp - > states [ trans - > to ] ;
/*
* this is a multiple input sequence
2005-11-01 18:24:02 +03:00
* If there is a counter associated increment it now .
2007-08-28 21:33:45 +04:00
* do not increment if the counter is already over the
* maximum limit in which case get to next transition
2002-04-16 19:50:10 +04:00
*/
2005-11-01 18:24:02 +03:00
if ( trans - > counter > = 0 ) {
2007-08-28 21:33:45 +04:00
xmlRegCounterPtr counter ;
if ( ( exec - > counts = = NULL ) | |
( exec - > comp = = NULL ) | |
( exec - > comp - > counters = = NULL ) ) {
2023-09-22 16:25:40 +03:00
exec - > status = XML_REGEXP_INTERNAL_ERROR ;
2006-03-10 03:36:23 +03:00
goto error ;
}
2007-08-28 21:33:45 +04:00
counter = & exec - > comp - > counters [ trans - > counter ] ;
if ( exec - > counts [ trans - > counter ] > = counter - > max )
continue ; /* for loop on transitions */
2021-12-20 02:34:58 +03:00
}
/* Save before incrementing */
if ( exec - > state - > nbTrans > exec - > transno + 1 ) {
xmlFARegExecSave ( exec ) ;
2023-09-23 18:31:55 +03:00
if ( exec - > status ! = XML_REGEXP_OK )
goto error ;
2021-12-20 02:34:58 +03:00
}
if ( trans - > counter > = 0 ) {
2005-11-01 18:24:02 +03:00
exec - > counts [ trans - > counter ] + + ;
}
2002-04-16 19:50:10 +04:00
exec - > transcount = 1 ;
do {
/*
* Try to progress as much as possible on the input
*/
if ( exec - > transcount = = atom - > max ) {
break ;
}
exec - > index + = len ;
/*
* End of input : stop here
*/
if ( exec - > inputString [ exec - > index ] = = 0 ) {
exec - > index - = len ;
break ;
}
if ( exec - > transcount > = atom - > min ) {
int transno = exec - > transno ;
xmlRegStatePtr state = exec - > state ;
/*
* The transition is acceptable save it
*/
exec - > transno = - 1 ; /* trick */
exec - > state = to ;
xmlFARegExecSave ( exec ) ;
2023-09-23 18:31:55 +03:00
if ( exec - > status ! = XML_REGEXP_OK )
goto error ;
2002-04-16 19:50:10 +04:00
exec - > transno = transno ;
exec - > state = state ;
}
2023-09-22 16:25:40 +03:00
len = 4 ;
codepoint = xmlGetUTF8Char (
& exec - > inputString [ exec - > index ] , & len ) ;
if ( codepoint < 0 ) {
exec - > status = XML_REGEXP_INVALID_UTF8 ;
goto error ;
}
2002-04-16 19:50:10 +04:00
ret = xmlRegCheckCharacter ( atom , codepoint ) ;
exec - > transcount + + ;
} while ( ret = = 1 ) ;
if ( exec - > transcount < atom - > min )
ret = 0 ;
/*
* If the last check failed but one transition was found
* possible , rollback
*/
if ( ret < 0 )
ret = 0 ;
if ( ret = = 0 ) {
goto rollback ;
}
2005-11-01 18:24:02 +03:00
if ( trans - > counter > = 0 ) {
2006-03-10 03:36:23 +03:00
if ( exec - > counts = = NULL ) {
2023-09-22 16:25:40 +03:00
exec - > status = XML_REGEXP_INTERNAL_ERROR ;
2006-03-10 03:36:23 +03:00
goto error ;
}
2005-11-01 18:24:02 +03:00
exec - > counts [ trans - > counter ] - - ;
}
2004-04-26 19:40:47 +04:00
} else if ( ( ret = = 0 ) & & ( atom - > min = = 0 ) & & ( atom - > max > 0 ) ) {
/*
* we don ' t match on the codepoint , but minOccurs of 0
* says that ' s ok . Setting len to 0 inhibits stepping
* over the codepoint .
*/
exec - > transcount = 1 ;
len = 0 ;
ret = 1 ;
2002-04-16 19:50:10 +04:00
}
2004-04-26 19:40:47 +04:00
} else if ( ( atom - > min = = 0 ) & & ( atom - > max > 0 ) ) {
/* another spot to match when minOccurs is 0 */
exec - > transcount = 1 ;
len = 0 ;
ret = 1 ;
2002-04-16 19:50:10 +04:00
}
if ( ret = = 1 ) {
2005-10-18 23:11:55 +04:00
if ( ( trans - > nd = = 1 ) | |
( ( trans - > count > = 0 ) & & ( deter = = 0 ) & &
( exec - > state - > nbTrans > exec - > transno + 1 ) ) ) {
2002-04-16 19:50:10 +04:00
xmlFARegExecSave ( exec ) ;
2023-09-23 18:31:55 +03:00
if ( exec - > status ! = XML_REGEXP_OK )
goto error ;
2002-04-16 19:50:10 +04:00
}
if ( trans - > counter > = 0 ) {
2007-08-28 21:33:45 +04:00
xmlRegCounterPtr counter ;
/* make sure we don't go over the counter maximum value */
if ( ( exec - > counts = = NULL ) | |
( exec - > comp = = NULL ) | |
( exec - > comp - > counters = = NULL ) ) {
2023-09-22 16:25:40 +03:00
exec - > status = XML_REGEXP_INTERNAL_ERROR ;
2006-03-10 03:36:23 +03:00
goto error ;
}
2007-08-28 21:33:45 +04:00
counter = & exec - > comp - > counters [ trans - > counter ] ;
if ( exec - > counts [ trans - > counter ] > = counter - > max )
continue ; /* for loop on transitions */
2002-04-16 19:50:10 +04:00
exec - > counts [ trans - > counter ] + + ;
}
2005-08-08 17:05:13 +04:00
if ( ( trans - > count > = 0 ) & &
( trans - > count < REGEXP_ALL_COUNTER ) ) {
2006-03-10 03:36:23 +03:00
if ( exec - > counts = = NULL ) {
2023-09-22 16:25:40 +03:00
exec - > status = XML_REGEXP_INTERNAL_ERROR ;
2006-03-10 03:36:23 +03:00
goto error ;
}
2005-08-08 17:05:13 +04:00
exec - > counts [ trans - > count ] = 0 ;
}
2002-04-16 19:50:10 +04:00
exec - > state = comp - > states [ trans - > to ] ;
exec - > transno = 0 ;
if ( trans - > atom ! = NULL ) {
exec - > index + = len ;
}
goto progress ;
} else if ( ret < 0 ) {
2023-09-22 16:25:40 +03:00
exec - > status = XML_REGEXP_INTERNAL_ERROR ;
2002-04-16 19:50:10 +04:00
break ;
}
}
if ( ( exec - > transno ! = 0 ) | | ( exec - > state - > nbTrans = = 0 ) ) {
rollback :
/*
* Failed to find a way out
*/
exec - > determinist = 0 ;
xmlFARegExecRollBack ( exec ) ;
}
progress :
continue ;
}
2006-03-10 03:36:23 +03:00
error :
2002-04-16 19:50:10 +04:00
if ( exec - > rollbacks ! = NULL ) {
if ( exec - > counts ! = NULL ) {
int i ;
for ( i = 0 ; i < exec - > maxRollbacks ; i + + )
if ( exec - > rollbacks [ i ] . counts ! = NULL )
xmlFree ( exec - > rollbacks [ i ] . counts ) ;
}
xmlFree ( exec - > rollbacks ) ;
}
2012-08-17 16:34:05 +04:00
if ( exec - > state = = NULL )
2023-09-22 16:25:40 +03:00
return ( XML_REGEXP_INTERNAL_ERROR ) ;
2002-04-16 19:50:10 +04:00
if ( exec - > counts ! = NULL )
xmlFree ( exec - > counts ) ;
2023-09-22 16:25:40 +03:00
if ( exec - > status = = XML_REGEXP_OK )
2002-04-16 19:50:10 +04:00
return ( 1 ) ;
2023-09-22 16:25:40 +03:00
if ( exec - > status = = XML_REGEXP_NOT_FOUND )
2002-04-16 19:50:10 +04:00
return ( 0 ) ;
return ( exec - > status ) ;
}
/************************************************************************
2012-09-11 09:26:36 +04:00
* *
2004-05-06 08:17:26 +04:00
* Progressive interface to the verifier one atom at a time *
2012-09-11 09:26:36 +04:00
* *
2002-04-16 19:50:10 +04:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
2002-12-10 18:19:08 +03:00
* xmlRegNewExecCtxt :
2002-04-16 19:50:10 +04:00
* @ comp : a precompiled regular expression
* @ callback : a callback function used for handling progresses in the
* automata matching phase
* @ data : the context data associated to the callback in this context
*
* Build a context used for progressive evaluation of a regexp .
2002-12-10 18:19:08 +03:00
*
* Returns the new context
2002-04-16 19:50:10 +04:00
*/
xmlRegExecCtxtPtr
xmlRegNewExecCtxt ( xmlRegexpPtr comp , xmlRegExecCallbacks callback , void * data ) {
xmlRegExecCtxtPtr exec ;
if ( comp = = NULL )
return ( NULL ) ;
2003-04-24 20:06:47 +04:00
if ( ( comp - > compact = = NULL ) & & ( comp - > states = = NULL ) )
return ( NULL ) ;
2002-04-16 19:50:10 +04:00
exec = ( xmlRegExecCtxtPtr ) xmlMalloc ( sizeof ( xmlRegExecCtxt ) ) ;
2023-12-10 19:09:45 +03:00
if ( exec = = NULL )
2002-04-16 19:50:10 +04:00
return ( NULL ) ;
memset ( exec , 0 , sizeof ( xmlRegExecCtxt ) ) ;
exec - > inputString = NULL ;
exec - > index = 0 ;
exec - > determinist = 1 ;
exec - > maxRollbacks = 0 ;
exec - > nbRollbacks = 0 ;
exec - > rollbacks = NULL ;
2023-09-22 16:25:40 +03:00
exec - > status = XML_REGEXP_OK ;
2002-04-16 19:50:10 +04:00
exec - > comp = comp ;
2002-09-19 23:56:43 +04:00
if ( comp - > compact = = NULL )
exec - > state = comp - > states [ 0 ] ;
2002-04-16 19:50:10 +04:00
exec - > transno = 0 ;
exec - > transcount = 0 ;
exec - > callback = callback ;
exec - > data = data ;
if ( comp - > nbCounters > 0 ) {
2005-01-07 16:56:19 +03:00
/*
* For error handling , exec - > counts is allocated twice the size
* the second half is used to store the data in case of rollback
*/
exec - > counts = ( int * ) xmlMalloc ( comp - > nbCounters * sizeof ( int )
* 2 ) ;
2002-04-16 19:50:10 +04:00
if ( exec - > counts = = NULL ) {
xmlFree ( exec ) ;
return ( NULL ) ;
}
2005-01-07 16:56:19 +03:00
memset ( exec - > counts , 0 , comp - > nbCounters * sizeof ( int ) * 2 ) ;
exec - > errCounts = & exec - > counts [ comp - > nbCounters ] ;
} else {
2002-04-16 19:50:10 +04:00
exec - > counts = NULL ;
2005-01-07 16:56:19 +03:00
exec - > errCounts = NULL ;
}
2002-04-16 19:50:10 +04:00
exec - > inputStackMax = 0 ;
exec - > inputStackNr = 0 ;
exec - > inputStack = NULL ;
2005-01-07 16:56:19 +03:00
exec - > errStateNo = - 1 ;
exec - > errString = NULL ;
2005-09-15 17:09:00 +04:00
exec - > nbPush = 0 ;
2002-04-16 19:50:10 +04:00
return ( exec ) ;
}
/**
* xmlRegFreeExecCtxt :
2019-09-30 18:04:54 +03:00
* @ exec : a regular expression evaluation context
2002-04-16 19:50:10 +04:00
*
2019-09-30 18:04:54 +03:00
* Free the structures associated to a regular expression evaluation context .
2002-04-16 19:50:10 +04:00
*/
void
xmlRegFreeExecCtxt ( xmlRegExecCtxtPtr exec ) {
if ( exec = = NULL )
return ;
if ( exec - > rollbacks ! = NULL ) {
if ( exec - > counts ! = NULL ) {
int i ;
for ( i = 0 ; i < exec - > maxRollbacks ; i + + )
if ( exec - > rollbacks [ i ] . counts ! = NULL )
xmlFree ( exec - > rollbacks [ i ] . counts ) ;
}
xmlFree ( exec - > rollbacks ) ;
}
if ( exec - > counts ! = NULL )
xmlFree ( exec - > counts ) ;
if ( exec - > inputStack ! = NULL ) {
int i ;
2002-10-16 18:08:14 +04:00
for ( i = 0 ; i < exec - > inputStackNr ; i + + ) {
if ( exec - > inputStack [ i ] . value ! = NULL )
xmlFree ( exec - > inputStack [ i ] . value ) ;
}
2002-04-16 19:50:10 +04:00
xmlFree ( exec - > inputStack ) ;
}
2005-01-07 16:56:19 +03:00
if ( exec - > errString ! = NULL )
xmlFree ( exec - > errString ) ;
2002-04-16 19:50:10 +04:00
xmlFree ( exec ) ;
}
2024-05-05 21:20:22 +03:00
static int
2023-12-10 19:09:45 +03:00
xmlRegExecSetErrString ( xmlRegExecCtxtPtr exec , const xmlChar * value ) {
if ( exec - > errString ! = NULL )
xmlFree ( exec - > errString ) ;
if ( value = = NULL ) {
exec - > errString = NULL ;
} else {
exec - > errString = xmlStrdup ( value ) ;
2024-05-05 21:20:22 +03:00
if ( exec - > errString = = NULL ) {
2023-12-10 19:09:45 +03:00
exec - > status = XML_REGEXP_OUT_OF_MEMORY ;
2024-05-05 21:20:22 +03:00
return ( - 1 ) ;
}
2023-12-10 19:09:45 +03:00
}
2024-05-05 21:20:22 +03:00
return ( 0 ) ;
2023-12-10 19:09:45 +03:00
}
2002-04-16 19:50:10 +04:00
static void
xmlFARegExecSaveInputString ( xmlRegExecCtxtPtr exec , const xmlChar * value ,
void * data ) {
if ( exec - > inputStackMax = = 0 ) {
exec - > inputStackMax = 4 ;
2012-09-11 09:26:36 +04:00
exec - > inputStack = ( xmlRegInputTokenPtr )
2002-04-16 19:50:10 +04:00
xmlMalloc ( exec - > inputStackMax * sizeof ( xmlRegInputToken ) ) ;
if ( exec - > inputStack = = NULL ) {
exec - > inputStackMax = 0 ;
2023-12-10 19:09:45 +03:00
exec - > status = XML_REGEXP_OUT_OF_MEMORY ;
2002-04-16 19:50:10 +04:00
return ;
}
} else if ( exec - > inputStackNr + 1 > = exec - > inputStackMax ) {
xmlRegInputTokenPtr tmp ;
exec - > inputStackMax * = 2 ;
tmp = ( xmlRegInputTokenPtr ) xmlRealloc ( exec - > inputStack ,
exec - > inputStackMax * sizeof ( xmlRegInputToken ) ) ;
if ( tmp = = NULL ) {
exec - > inputStackMax / = 2 ;
2023-12-10 19:09:45 +03:00
exec - > status = XML_REGEXP_OUT_OF_MEMORY ;
2002-04-16 19:50:10 +04:00
return ;
}
exec - > inputStack = tmp ;
}
2023-12-10 19:09:45 +03:00
if ( value = = NULL ) {
exec - > inputStack [ exec - > inputStackNr ] . value = NULL ;
} else {
exec - > inputStack [ exec - > inputStackNr ] . value = xmlStrdup ( value ) ;
if ( exec - > inputStack [ exec - > inputStackNr ] . value = = NULL ) {
exec - > status = XML_REGEXP_OUT_OF_MEMORY ;
return ;
}
}
2002-04-16 19:50:10 +04:00
exec - > inputStack [ exec - > inputStackNr ] . data = data ;
exec - > inputStackNr + + ;
exec - > inputStack [ exec - > inputStackNr ] . value = NULL ;
exec - > inputStack [ exec - > inputStackNr ] . data = NULL ;
}
2004-08-10 18:17:33 +04:00
/**
* xmlRegStrEqualWildcard :
2012-09-11 09:26:36 +04:00
* @ expStr : the string to be evaluated
2004-08-10 18:17:33 +04:00
* @ valStr : the validation string
*
* Checks if both strings are equal or have the same content . " * "
2019-09-30 18:04:54 +03:00
* can be used as a wildcard in @ valStr ; " | " is used as a separator of
2004-08-10 18:17:33 +04:00
* substrings in both @ expStr and @ valStr .
*
* Returns 1 if the comparison is satisfied and the number of substrings
* is equal , 0 otherwise .
*/
static int
xmlRegStrEqualWildcard ( const xmlChar * expStr , const xmlChar * valStr ) {
if ( expStr = = valStr ) return ( 1 ) ;
if ( expStr = = NULL ) return ( 0 ) ;
if ( valStr = = NULL ) return ( 0 ) ;
do {
/*
* Eval if we have a wildcard for the current item .
*/
if ( * expStr ! = * valStr ) {
2005-08-10 01:40:08 +04:00
/* if one of them starts with a wildcard make valStr be it */
if ( * valStr = = ' * ' ) {
const xmlChar * tmp ;
tmp = valStr ;
valStr = expStr ;
expStr = tmp ;
}
2004-08-10 18:17:33 +04:00
if ( ( * valStr ! = 0 ) & & ( * expStr ! = 0 ) & & ( * expStr + + = = ' * ' ) ) {
do {
if ( * valStr = = XML_REG_STRING_SEPARATOR )
break ;
2005-04-19 19:02:20 +04:00
valStr + + ;
2004-08-10 18:17:33 +04:00
} while ( * valStr ! = 0 ) ;
continue ;
} else
return ( 0 ) ;
}
2005-04-19 19:02:20 +04:00
expStr + + ;
valStr + + ;
2004-08-10 18:17:33 +04:00
} while ( * valStr ! = 0 ) ;
if ( * expStr ! = 0 )
return ( 0 ) ;
else
return ( 1 ) ;
}
2002-04-16 19:50:10 +04:00
2002-09-19 23:56:43 +04:00
/**
* xmlRegCompactPushString :
* @ exec : a regexp execution context
* @ comp : the precompiled exec with a compact table
* @ value : a string token input
* @ data : data associated to the token to reuse in callbacks
*
* Push one input token in the execution context
*
* Returns : 1 if the regexp reached a final state , 0 if non - final , and
* a negative value in case of error .
*/
static int
xmlRegCompactPushString ( xmlRegExecCtxtPtr exec ,
xmlRegexpPtr comp ,
const xmlChar * value ,
void * data ) {
int state = exec - > index ;
int i , target ;
if ( ( comp = = NULL ) | | ( comp - > compact = = NULL ) | | ( comp - > stringMap = = NULL ) )
return ( - 1 ) ;
2012-09-11 09:26:36 +04:00
2002-09-19 23:56:43 +04:00
if ( value = = NULL ) {
/*
* are we at a final state ?
*/
if ( comp - > compact [ state * ( comp - > nbstrings + 1 ) ] = =
XML_REGEXP_FINAL_STATE )
return ( 1 ) ;
return ( 0 ) ;
}
/*
2004-05-06 08:17:26 +04:00
* Examine all outside transitions from current state
2002-09-19 23:56:43 +04:00
*/
for ( i = 0 ; i < comp - > nbstrings ; i + + ) {
target = comp - > compact [ state * ( comp - > nbstrings + 1 ) + i + 1 ] ;
if ( ( target > 0 ) & & ( target < = comp - > nbstates ) ) {
2012-09-11 09:26:36 +04:00
target - - ; /* to avoid 0 */
2004-08-10 18:17:33 +04:00
if ( xmlRegStrEqualWildcard ( comp - > stringMap [ i ] , value ) ) {
2012-09-11 09:26:36 +04:00
exec - > index = target ;
2002-09-24 18:13:13 +04:00
if ( ( exec - > callback ! = NULL ) & & ( comp - > transdata ! = NULL ) ) {
exec - > callback ( exec - > data , value ,
comp - > transdata [ state * comp - > nbstrings + i ] , data ) ;
}
2005-01-12 16:21:17 +03:00
if ( comp - > compact [ target * ( comp - > nbstrings + 1 ) ] = =
XML_REGEXP_SINK_STATE )
goto error ;
2002-09-19 23:56:43 +04:00
if ( comp - > compact [ target * ( comp - > nbstrings + 1 ) ] = =
XML_REGEXP_FINAL_STATE )
return ( 1 ) ;
return ( 0 ) ;
}
}
}
/*
* Failed to find an exit transition out from current state for the
* current token
*/
2005-01-12 16:21:17 +03:00
error :
2005-01-07 16:56:19 +03:00
exec - > errStateNo = state ;
2023-09-22 16:25:40 +03:00
exec - > status = XML_REGEXP_NOT_FOUND ;
2023-12-10 19:09:45 +03:00
xmlRegExecSetErrString ( exec , value ) ;
return ( exec - > status ) ;
2002-09-19 23:56:43 +04:00
}
2002-04-16 19:50:10 +04:00
/**
2005-08-09 15:09:52 +04:00
* xmlRegExecPushStringInternal :
2002-12-20 03:16:24 +03:00
* @ exec : a regexp execution context or NULL to indicate the end
2002-04-16 19:50:10 +04:00
* @ value : a string token input
* @ data : data associated to the token to reuse in callbacks
2005-08-09 15:09:52 +04:00
* @ compound : value was assembled from 2 strings
2002-04-16 19:50:10 +04:00
*
* Push one input token in the execution context
*
* Returns : 1 if the regexp reached a final state , 0 if non - final , and
* a negative value in case of error .
*/
2005-08-09 15:09:52 +04:00
static int
xmlRegExecPushStringInternal ( xmlRegExecCtxtPtr exec , const xmlChar * value ,
void * data , int compound ) {
2002-04-16 19:50:10 +04:00
xmlRegTransPtr trans ;
xmlRegAtomPtr atom ;
int ret ;
int final = 0 ;
2005-01-09 01:05:09 +03:00
int progress = 1 ;
2002-04-16 19:50:10 +04:00
if ( exec = = NULL )
return ( - 1 ) ;
2002-09-19 23:56:43 +04:00
if ( exec - > comp = = NULL )
return ( - 1 ) ;
2023-09-22 16:25:40 +03:00
if ( exec - > status ! = XML_REGEXP_OK )
2002-04-16 19:50:10 +04:00
return ( exec - > status ) ;
2002-09-19 23:56:43 +04:00
if ( exec - > comp - > compact ! = NULL )
return ( xmlRegCompactPushString ( exec , exec - > comp , value , data ) ) ;
2002-04-16 19:50:10 +04:00
if ( value = = NULL ) {
if ( exec - > state - > type = = XML_REGEXP_FINAL_STATE )
return ( 1 ) ;
final = 1 ;
}
/*
* If we have an active rollback stack push the new value there
* and get back to where we were left
*/
if ( ( value ! = NULL ) & & ( exec - > inputStackNr > 0 ) ) {
xmlFARegExecSaveInputString ( exec , value , data ) ;
value = exec - > inputStack [ exec - > index ] . value ;
data = exec - > inputStack [ exec - > index ] . data ;
}
2023-09-22 16:25:40 +03:00
while ( ( exec - > status = = XML_REGEXP_OK ) & &
2002-04-16 19:50:10 +04:00
( ( value ! = NULL ) | |
( ( final = = 1 ) & &
( exec - > state - > type ! = XML_REGEXP_FINAL_STATE ) ) ) ) {
/*
* End of input on non - terminal state , rollback , however we may
* still have epsilon like transition for counted transitions
* on counters , in that case don ' t break too early .
*/
2002-04-17 20:28:10 +04:00
if ( ( value = = NULL ) & & ( exec - > counts = = NULL ) )
2002-04-16 19:50:10 +04:00
goto rollback ;
exec - > transcount = 0 ;
for ( ; exec - > transno < exec - > state - > nbTrans ; exec - > transno + + ) {
trans = & exec - > state - > trans [ exec - > transno ] ;
if ( trans - > to < 0 )
continue ;
atom = trans - > atom ;
ret = 0 ;
2002-04-20 21:38:48 +04:00
if ( trans - > count = = REGEXP_ALL_LAX_COUNTER ) {
int i ;
int count ;
xmlRegTransPtr t ;
xmlRegCounterPtr counter ;
ret = 0 ;
/*
* Check all counted transitions from the current state
*/
if ( ( value = = NULL ) & & ( final ) ) {
ret = 1 ;
} else if ( value ! = NULL ) {
for ( i = 0 ; i < exec - > state - > nbTrans ; i + + ) {
t = & exec - > state - > trans [ i ] ;
if ( ( t - > counter < 0 ) | | ( t = = trans ) )
continue ;
counter = & exec - > comp - > counters [ t - > counter ] ;
count = exec - > counts [ t - > counter ] ;
2012-09-11 09:26:36 +04:00
if ( ( count < counter - > max ) & &
2002-04-20 21:38:48 +04:00
( t - > atom ! = NULL ) & &
( xmlStrEqual ( value , t - > atom - > valuep ) ) ) {
ret = 0 ;
break ;
}
if ( ( count > = counter - > min ) & &
( count < counter - > max ) & &
2006-03-10 03:36:23 +03:00
( t - > atom ! = NULL ) & &
2002-04-20 21:38:48 +04:00
( xmlStrEqual ( value , t - > atom - > valuep ) ) ) {
ret = 1 ;
break ;
}
}
}
} else if ( trans - > count = = REGEXP_ALL_COUNTER ) {
2002-04-20 11:24:11 +04:00
int i ;
int count ;
xmlRegTransPtr t ;
xmlRegCounterPtr counter ;
ret = 1 ;
/*
* Check all counted transitions from the current state
*/
for ( i = 0 ; i < exec - > state - > nbTrans ; i + + ) {
t = & exec - > state - > trans [ i ] ;
if ( ( t - > counter < 0 ) | | ( t = = trans ) )
continue ;
counter = & exec - > comp - > counters [ t - > counter ] ;
count = exec - > counts [ t - > counter ] ;
if ( ( count < counter - > min ) | | ( count > counter - > max ) ) {
ret = 0 ;
break ;
}
}
} else if ( trans - > count > = 0 ) {
2002-04-16 19:50:10 +04:00
int count ;
xmlRegCounterPtr counter ;
/*
* A counted transition .
*/
count = exec - > counts [ trans - > count ] ;
counter = & exec - > comp - > counters [ trans - > count ] ;
ret = ( ( count > = counter - > min ) & & ( count < = counter - > max ) ) ;
} else if ( atom = = NULL ) {
fprintf ( stderr , " epsilon transition left at runtime \n " ) ;
2023-09-22 16:25:40 +03:00
exec - > status = XML_REGEXP_INTERNAL_ERROR ;
2002-04-16 19:50:10 +04:00
break ;
} else if ( value ! = NULL ) {
2004-08-10 18:17:33 +04:00
ret = xmlRegStrEqualWildcard ( atom - > valuep , value ) ;
2005-08-09 15:09:52 +04:00
if ( atom - > neg ) {
2005-07-19 18:33:55 +04:00
ret = ! ret ;
2005-08-09 15:09:52 +04:00
if ( ! compound )
ret = 0 ;
}
2002-04-20 21:38:48 +04:00
if ( ( ret = = 1 ) & & ( trans - > counter > = 0 ) ) {
xmlRegCounterPtr counter ;
int count ;
count = exec - > counts [ trans - > counter ] ;
counter = & exec - > comp - > counters [ trans - > counter ] ;
if ( count > = counter - > max )
ret = 0 ;
}
2002-04-16 19:50:10 +04:00
if ( ( ret = = 1 ) & & ( atom - > min > 0 ) & & ( atom - > max > 0 ) ) {
xmlRegStatePtr to = exec - > comp - > states [ trans - > to ] ;
/*
* this is a multiple input sequence
*/
if ( exec - > state - > nbTrans > exec - > transno + 1 ) {
if ( exec - > inputStackNr < = 0 ) {
xmlFARegExecSaveInputString ( exec , value , data ) ;
}
xmlFARegExecSave ( exec ) ;
}
exec - > transcount = 1 ;
do {
/*
* Try to progress as much as possible on the input
*/
if ( exec - > transcount = = atom - > max ) {
break ;
}
exec - > index + + ;
value = exec - > inputStack [ exec - > index ] . value ;
data = exec - > inputStack [ exec - > index ] . data ;
/*
* End of input : stop here
*/
if ( value = = NULL ) {
exec - > index - - ;
break ;
}
if ( exec - > transcount > = atom - > min ) {
int transno = exec - > transno ;
xmlRegStatePtr state = exec - > state ;
/*
* The transition is acceptable save it
*/
exec - > transno = - 1 ; /* trick */
exec - > state = to ;
if ( exec - > inputStackNr < = 0 ) {
xmlFARegExecSaveInputString ( exec , value , data ) ;
}
xmlFARegExecSave ( exec ) ;
exec - > transno = transno ;
exec - > state = state ;
}
ret = xmlStrEqual ( value , atom - > valuep ) ;
exec - > transcount + + ;
} while ( ret = = 1 ) ;
if ( exec - > transcount < atom - > min )
ret = 0 ;
/*
* If the last check failed but one transition was found
* possible , rollback
*/
if ( ret < 0 )
ret = 0 ;
if ( ret = = 0 ) {
goto rollback ;
}
}
}
if ( ret = = 1 ) {
2003-12-26 09:03:14 +03:00
if ( ( exec - > callback ! = NULL ) & & ( atom ! = NULL ) & &
( data ! = NULL ) ) {
2002-04-16 19:50:10 +04:00
exec - > callback ( exec - > data , atom - > valuep ,
atom - > data , data ) ;
}
if ( exec - > state - > nbTrans > exec - > transno + 1 ) {
if ( exec - > inputStackNr < = 0 ) {
xmlFARegExecSaveInputString ( exec , value , data ) ;
}
xmlFARegExecSave ( exec ) ;
}
if ( trans - > counter > = 0 ) {
exec - > counts [ trans - > counter ] + + ;
}
2005-08-08 17:05:13 +04:00
if ( ( trans - > count > = 0 ) & &
( trans - > count < REGEXP_ALL_COUNTER ) ) {
exec - > counts [ trans - > count ] = 0 ;
}
2005-01-12 16:21:17 +03:00
if ( ( exec - > comp - > states [ trans - > to ] ! = NULL ) & &
( exec - > comp - > states [ trans - > to ] - > type = =
XML_REGEXP_SINK_STATE ) ) {
/*
* entering a sink state , save the current state as error
* state .
*/
2024-05-05 21:20:22 +03:00
if ( xmlRegExecSetErrString ( exec , value ) < 0 )
break ;
2005-01-12 16:21:17 +03:00
exec - > errState = exec - > state ;
memcpy ( exec - > errCounts , exec - > counts ,
exec - > comp - > nbCounters * sizeof ( int ) ) ;
}
2002-04-16 19:50:10 +04:00
exec - > state = exec - > comp - > states [ trans - > to ] ;
exec - > transno = 0 ;
if ( trans - > atom ! = NULL ) {
if ( exec - > inputStack ! = NULL ) {
exec - > index + + ;
if ( exec - > index < exec - > inputStackNr ) {
value = exec - > inputStack [ exec - > index ] . value ;
data = exec - > inputStack [ exec - > index ] . data ;
} else {
value = NULL ;
data = NULL ;
}
} else {
value = NULL ;
data = NULL ;
}
}
goto progress ;
} else if ( ret < 0 ) {
2023-09-22 16:25:40 +03:00
exec - > status = XML_REGEXP_INTERNAL_ERROR ;
2002-04-16 19:50:10 +04:00
break ;
}
}
if ( ( exec - > transno ! = 0 ) | | ( exec - > state - > nbTrans = = 0 ) ) {
rollback :
2005-01-09 01:05:09 +03:00
/*
2005-01-12 16:21:17 +03:00
* if we didn ' t yet rollback on the current input
* store the current state as the error state .
2005-01-09 01:05:09 +03:00
*/
2005-01-12 16:21:17 +03:00
if ( ( progress ) & & ( exec - > state ! = NULL ) & &
( exec - > state - > type ! = XML_REGEXP_SINK_STATE ) ) {
2005-01-09 01:05:09 +03:00
progress = 0 ;
2024-05-05 21:20:22 +03:00
if ( xmlRegExecSetErrString ( exec , value ) < 0 )
break ;
2005-01-09 01:05:09 +03:00
exec - > errState = exec - > state ;
2017-05-31 17:48:27 +03:00
if ( exec - > comp - > nbCounters )
memcpy ( exec - > errCounts , exec - > counts ,
exec - > comp - > nbCounters * sizeof ( int ) ) ;
2005-01-09 01:05:09 +03:00
}
2002-04-16 19:50:10 +04:00
/*
* Failed to find a way out
*/
exec - > determinist = 0 ;
xmlFARegExecRollBack ( exec ) ;
2023-09-22 16:25:40 +03:00
if ( ( exec - > inputStack ! = NULL ) & &
( exec - > status = = XML_REGEXP_OK ) ) {
2002-04-16 19:50:10 +04:00
value = exec - > inputStack [ exec - > index ] . value ;
data = exec - > inputStack [ exec - > index ] . data ;
}
}
2005-01-09 01:05:09 +03:00
continue ;
2002-04-16 19:50:10 +04:00
progress :
2005-01-09 01:05:09 +03:00
progress = 1 ;
2002-04-16 19:50:10 +04:00
continue ;
}
2023-09-22 16:25:40 +03:00
if ( exec - > status = = XML_REGEXP_OK ) {
2002-04-16 19:50:10 +04:00
return ( exec - > state - > type = = XML_REGEXP_FINAL_STATE ) ;
}
return ( exec - > status ) ;
}
2005-08-09 15:09:52 +04:00
/**
* xmlRegExecPushString :
* @ exec : a regexp execution context or NULL to indicate the end
* @ value : a string token input
* @ data : data associated to the token to reuse in callbacks
*
* Push one input token in the execution context
*
* Returns : 1 if the regexp reached a final state , 0 if non - final , and
* a negative value in case of error .
*/
int
xmlRegExecPushString ( xmlRegExecCtxtPtr exec , const xmlChar * value ,
void * data ) {
return ( xmlRegExecPushStringInternal ( exec , value , data , 0 ) ) ;
}
2003-04-13 23:53:42 +04:00
/**
* xmlRegExecPushString2 :
* @ exec : a regexp execution context or NULL to indicate the end
* @ value : the first string token input
* @ value2 : the second string token input
* @ data : data associated to the token to reuse in callbacks
*
* Push one input token in the execution context
*
* Returns : 1 if the regexp reached a final state , 0 if non - final , and
* a negative value in case of error .
*/
int
xmlRegExecPushString2 ( xmlRegExecCtxtPtr exec , const xmlChar * value ,
const xmlChar * value2 , void * data ) {
xmlChar buf [ 150 ] ;
int lenn , lenp , ret ;
xmlChar * str ;
if ( exec = = NULL )
return ( - 1 ) ;
if ( exec - > comp = = NULL )
return ( - 1 ) ;
2023-09-22 16:25:40 +03:00
if ( exec - > status ! = XML_REGEXP_OK )
2003-04-13 23:53:42 +04:00
return ( exec - > status ) ;
if ( value2 = = NULL )
return ( xmlRegExecPushString ( exec , value , data ) ) ;
lenn = strlen ( ( char * ) value2 ) ;
lenp = strlen ( ( char * ) value ) ;
if ( 150 < lenn + lenp + 2 ) {
2003-04-19 04:07:51 +04:00
str = ( xmlChar * ) xmlMallocAtomic ( lenn + lenp + 2 ) ;
2003-04-13 23:53:42 +04:00
if ( str = = NULL ) {
2023-09-22 16:25:40 +03:00
exec - > status = XML_REGEXP_OUT_OF_MEMORY ;
2003-04-13 23:53:42 +04:00
return ( - 1 ) ;
}
} else {
str = buf ;
}
memcpy ( & str [ 0 ] , value , lenp ) ;
2004-08-10 18:17:33 +04:00
str [ lenp ] = XML_REG_STRING_SEPARATOR ;
2003-04-13 23:53:42 +04:00
memcpy ( & str [ lenp + 1 ] , value2 , lenn ) ;
str [ lenn + lenp + 1 ] = 0 ;
if ( exec - > comp - > compact ! = NULL )
ret = xmlRegCompactPushString ( exec , exec - > comp , str , data ) ;
else
2005-08-09 15:09:52 +04:00
ret = xmlRegExecPushStringInternal ( exec , str , data , 1 ) ;
2003-04-13 23:53:42 +04:00
if ( str ! = buf )
2005-12-29 00:13:33 +03:00
xmlFree ( str ) ;
2003-04-13 23:53:42 +04:00
return ( ret ) ;
}
2005-01-07 16:56:19 +03:00
/**
2005-07-19 20:26:18 +04:00
* xmlRegExecGetValues :
2005-01-09 20:48:02 +03:00
* @ exec : a regexp execution context
* @ err : error extraction or normal one
2005-01-07 16:56:19 +03:00
* @ nbval : pointer to the number of accepted values IN / OUT
2005-01-12 16:21:17 +03:00
* @ nbneg : return number of negative transitions
2005-01-07 16:56:19 +03:00
* @ values : pointer to the array of acceptable values
2005-01-09 20:48:02 +03:00
* @ terminal : return value if this was a terminal state
2005-01-07 16:56:19 +03:00
*
2020-03-08 19:19:42 +03:00
* Extract information from the regexp execution , internal routine to
2005-01-09 20:48:02 +03:00
* implement xmlRegExecNextValues ( ) and xmlRegExecErrInfo ( )
2005-01-07 16:56:19 +03:00
*
* Returns : 0 in case of success or - 1 in case of error .
*/
2005-01-09 20:48:02 +03:00
static int
xmlRegExecGetValues ( xmlRegExecCtxtPtr exec , int err ,
2005-01-12 16:21:17 +03:00
int * nbval , int * nbneg ,
xmlChar * * values , int * terminal ) {
2005-01-07 16:56:19 +03:00
int maxval ;
2005-01-12 16:21:17 +03:00
int nb = 0 ;
2005-01-07 16:56:19 +03:00
2012-09-11 09:26:36 +04:00
if ( ( exec = = NULL ) | | ( nbval = = NULL ) | | ( nbneg = = NULL ) | |
2005-01-12 16:21:17 +03:00
( values = = NULL ) | | ( * nbval < = 0 ) )
2005-01-07 16:56:19 +03:00
return ( - 1 ) ;
2005-01-09 20:48:02 +03:00
2005-01-07 16:56:19 +03:00
maxval = * nbval ;
* nbval = 0 ;
2005-01-12 16:21:17 +03:00
* nbneg = 0 ;
2005-01-07 16:56:19 +03:00
if ( ( exec - > comp ! = NULL ) & & ( exec - > comp - > compact ! = NULL ) ) {
xmlRegexpPtr comp ;
int target , i , state ;
comp = exec - > comp ;
2005-01-09 20:48:02 +03:00
if ( err ) {
if ( exec - > errStateNo = = - 1 ) return ( - 1 ) ;
state = exec - > errStateNo ;
} else {
state = exec - > index ;
}
if ( terminal ! = NULL ) {
if ( comp - > compact [ state * ( comp - > nbstrings + 1 ) ] = =
XML_REGEXP_FINAL_STATE )
* terminal = 1 ;
else
* terminal = 0 ;
}
2005-01-12 16:21:17 +03:00
for ( i = 0 ; ( i < comp - > nbstrings ) & & ( nb < maxval ) ; i + + ) {
2005-01-07 16:56:19 +03:00
target = comp - > compact [ state * ( comp - > nbstrings + 1 ) + i + 1 ] ;
2005-01-12 16:21:17 +03:00
if ( ( target > 0 ) & & ( target < = comp - > nbstates ) & &
( comp - > compact [ ( target - 1 ) * ( comp - > nbstrings + 1 ) ] ! =
XML_REGEXP_SINK_STATE ) ) {
values [ nb + + ] = comp - > stringMap [ i ] ;
2005-01-07 16:56:19 +03:00
( * nbval ) + + ;
}
}
2005-01-12 16:21:17 +03:00
for ( i = 0 ; ( i < comp - > nbstrings ) & & ( nb < maxval ) ; i + + ) {
target = comp - > compact [ state * ( comp - > nbstrings + 1 ) + i + 1 ] ;
if ( ( target > 0 ) & & ( target < = comp - > nbstates ) & &
( comp - > compact [ ( target - 1 ) * ( comp - > nbstrings + 1 ) ] = =
XML_REGEXP_SINK_STATE ) ) {
values [ nb + + ] = comp - > stringMap [ i ] ;
( * nbneg ) + + ;
}
}
2005-01-07 16:56:19 +03:00
} else {
int transno ;
xmlRegTransPtr trans ;
xmlRegAtomPtr atom ;
2005-01-09 20:48:02 +03:00
xmlRegStatePtr state ;
if ( terminal ! = NULL ) {
if ( exec - > state - > type = = XML_REGEXP_FINAL_STATE )
* terminal = 1 ;
else
* terminal = 0 ;
}
2005-01-07 16:56:19 +03:00
2005-01-09 20:48:02 +03:00
if ( err ) {
if ( exec - > errState = = NULL ) return ( - 1 ) ;
state = exec - > errState ;
} else {
if ( exec - > state = = NULL ) return ( - 1 ) ;
state = exec - > state ;
}
2005-01-07 16:56:19 +03:00
for ( transno = 0 ;
2005-01-12 16:21:17 +03:00
( transno < state - > nbTrans ) & & ( nb < maxval ) ;
2005-01-07 16:56:19 +03:00
transno + + ) {
2005-01-09 20:48:02 +03:00
trans = & state - > trans [ transno ] ;
2005-01-07 16:56:19 +03:00
if ( trans - > to < 0 )
continue ;
atom = trans - > atom ;
if ( ( atom = = NULL ) | | ( atom - > valuep = = NULL ) )
continue ;
if ( trans - > count = = REGEXP_ALL_LAX_COUNTER ) {
2005-01-12 16:21:17 +03:00
/* this should not be reached but ... */
2005-01-07 16:56:19 +03:00
} else if ( trans - > count = = REGEXP_ALL_COUNTER ) {
2005-01-12 16:21:17 +03:00
/* this should not be reached but ... */
2005-01-07 16:56:19 +03:00
} else if ( trans - > counter > = 0 ) {
2006-03-10 03:36:23 +03:00
xmlRegCounterPtr counter = NULL ;
2005-01-07 16:56:19 +03:00
int count ;
2005-01-09 20:48:02 +03:00
if ( err )
count = exec - > errCounts [ trans - > counter ] ;
else
count = exec - > counts [ trans - > counter ] ;
2006-03-10 03:36:23 +03:00
if ( exec - > comp ! = NULL )
counter = & exec - > comp - > counters [ trans - > counter ] ;
if ( ( counter = = NULL ) | | ( count < counter - > max ) ) {
2005-07-19 20:26:18 +04:00
if ( atom - > neg )
values [ nb + + ] = ( xmlChar * ) atom - > valuep2 ;
else
values [ nb + + ] = ( xmlChar * ) atom - > valuep ;
2005-01-07 16:56:19 +03:00
( * nbval ) + + ;
}
} else {
2013-09-11 10:59:06 +04:00
if ( ( exec - > comp ! = NULL ) & & ( exec - > comp - > states [ trans - > to ] ! = NULL ) & &
2005-01-12 16:21:17 +03:00
( exec - > comp - > states [ trans - > to ] - > type ! =
XML_REGEXP_SINK_STATE ) ) {
2005-07-19 20:26:18 +04:00
if ( atom - > neg )
values [ nb + + ] = ( xmlChar * ) atom - > valuep2 ;
else
values [ nb + + ] = ( xmlChar * ) atom - > valuep ;
2005-01-12 16:21:17 +03:00
( * nbval ) + + ;
}
2012-09-11 09:26:36 +04:00
}
2005-01-12 16:21:17 +03:00
}
for ( transno = 0 ;
( transno < state - > nbTrans ) & & ( nb < maxval ) ;
transno + + ) {
trans = & state - > trans [ transno ] ;
if ( trans - > to < 0 )
continue ;
atom = trans - > atom ;
if ( ( atom = = NULL ) | | ( atom - > valuep = = NULL ) )
continue ;
if ( trans - > count = = REGEXP_ALL_LAX_COUNTER ) {
continue ;
} else if ( trans - > count = = REGEXP_ALL_COUNTER ) {
continue ;
} else if ( trans - > counter > = 0 ) {
continue ;
} else {
if ( ( exec - > comp - > states [ trans - > to ] ! = NULL ) & &
( exec - > comp - > states [ trans - > to ] - > type = =
XML_REGEXP_SINK_STATE ) ) {
2005-07-19 20:26:18 +04:00
if ( atom - > neg )
values [ nb + + ] = ( xmlChar * ) atom - > valuep2 ;
else
values [ nb + + ] = ( xmlChar * ) atom - > valuep ;
2005-01-12 16:21:17 +03:00
( * nbneg ) + + ;
}
2012-09-11 09:26:36 +04:00
}
2005-01-07 16:56:19 +03:00
}
}
return ( 0 ) ;
}
2005-01-09 20:48:02 +03:00
/**
* xmlRegExecNextValues :
* @ exec : a regexp execution context
* @ nbval : pointer to the number of accepted values IN / OUT
2005-01-12 16:21:17 +03:00
* @ nbneg : return number of negative transitions
2005-01-09 20:48:02 +03:00
* @ values : pointer to the array of acceptable values
* @ terminal : return value if this was a terminal state
*
2020-03-08 19:19:42 +03:00
* Extract information from the regexp execution ,
2005-01-09 20:48:02 +03:00
* the parameter @ values must point to an array of @ nbval string pointers
* on return nbval will contain the number of possible strings in that
* state and the @ values array will be updated with them . The string values
* returned will be freed with the @ exec context and don ' t need to be
* deallocated .
*
* Returns : 0 in case of success or - 1 in case of error .
*/
int
2005-01-12 16:21:17 +03:00
xmlRegExecNextValues ( xmlRegExecCtxtPtr exec , int * nbval , int * nbneg ,
xmlChar * * values , int * terminal ) {
return ( xmlRegExecGetValues ( exec , 0 , nbval , nbneg , values , terminal ) ) ;
2005-01-09 20:48:02 +03:00
}
/**
* xmlRegExecErrInfo :
* @ exec : a regexp execution context generating an error
* @ string : return value for the error string
* @ nbval : pointer to the number of accepted values IN / OUT
2005-01-12 16:21:17 +03:00
* @ nbneg : return number of negative transitions
2005-01-09 20:48:02 +03:00
* @ values : pointer to the array of acceptable values
* @ terminal : return value if this was a terminal state
*
2020-03-08 19:19:42 +03:00
* Extract error information from the regexp execution , the parameter
2005-01-09 20:48:02 +03:00
* @ string will be updated with the value pushed and not accepted ,
* the parameter @ values must point to an array of @ nbval string pointers
* on return nbval will contain the number of possible strings in that
* state and the @ values array will be updated with them . The string values
* returned will be freed with the @ exec context and don ' t need to be
* deallocated .
*
* Returns : 0 in case of success or - 1 in case of error .
*/
int
xmlRegExecErrInfo ( xmlRegExecCtxtPtr exec , const xmlChar * * string ,
2005-01-12 16:21:17 +03:00
int * nbval , int * nbneg , xmlChar * * values , int * terminal ) {
2005-01-09 20:48:02 +03:00
if ( exec = = NULL )
return ( - 1 ) ;
if ( string ! = NULL ) {
2023-09-22 16:25:40 +03:00
if ( exec - > status ! = XML_REGEXP_OK )
2005-01-09 20:48:02 +03:00
* string = exec - > errString ;
else
* string = NULL ;
}
2005-01-12 16:21:17 +03:00
return ( xmlRegExecGetValues ( exec , 1 , nbval , nbneg , values , terminal ) ) ;
2005-01-09 20:48:02 +03:00
}
2002-04-16 19:50:10 +04:00
#if 0
static int
xmlRegExecPushChar ( xmlRegExecCtxtPtr exec , int UCS ) {
xmlRegTransPtr trans ;
xmlRegAtomPtr atom ;
int ret ;
int codepoint , len ;
if ( exec = = NULL )
return ( - 1 ) ;
2023-09-22 16:25:40 +03:00
if ( exec - > status ! = XML_REGEXP_OK )
2002-04-16 19:50:10 +04:00
return ( exec - > status ) ;
2023-09-22 16:25:40 +03:00
while ( ( exec - > status = = XML_REGEXP_OK ) & &
2002-04-16 19:50:10 +04:00
( ( exec - > inputString [ exec - > index ] ! = 0 ) | |
( exec - > state - > type ! = XML_REGEXP_FINAL_STATE ) ) ) {
/*
* End of input on non - terminal state , rollback , however we may
* still have epsilon like transition for counted transitions
* on counters , in that case don ' t break too early .
*/
if ( ( exec - > inputString [ exec - > index ] = = 0 ) & & ( exec - > counts = = NULL ) )
goto rollback ;
exec - > transcount = 0 ;
for ( ; exec - > transno < exec - > state - > nbTrans ; exec - > transno + + ) {
trans = & exec - > state - > trans [ exec - > transno ] ;
if ( trans - > to < 0 )
continue ;
atom = trans - > atom ;
ret = 0 ;
if ( trans - > count > = 0 ) {
int count ;
xmlRegCounterPtr counter ;
/*
* A counted transition .
*/
count = exec - > counts [ trans - > count ] ;
counter = & exec - > comp - > counters [ trans - > count ] ;
ret = ( ( count > = counter - > min ) & & ( count < = counter - > max ) ) ;
} else if ( atom = = NULL ) {
fprintf ( stderr , " epsilon transition left at runtime \n " ) ;
2023-09-22 16:25:40 +03:00
exec - > status = XML_REGEXP_INTERNAL_ERROR ;
2002-04-16 19:50:10 +04:00
break ;
} else if ( exec - > inputString [ exec - > index ] ! = 0 ) {
codepoint = CUR_SCHAR ( & ( exec - > inputString [ exec - > index ] ) , len ) ;
ret = xmlRegCheckCharacter ( atom , codepoint ) ;
if ( ( ret = = 1 ) & & ( atom - > min > 0 ) & & ( atom - > max > 0 ) ) {
xmlRegStatePtr to = exec - > comp - > states [ trans - > to ] ;
/*
* this is a multiple input sequence
*/
if ( exec - > state - > nbTrans > exec - > transno + 1 ) {
xmlFARegExecSave ( exec ) ;
}
exec - > transcount = 1 ;
do {
/*
* Try to progress as much as possible on the input
*/
if ( exec - > transcount = = atom - > max ) {
break ;
}
exec - > index + = len ;
/*
* End of input : stop here
*/
if ( exec - > inputString [ exec - > index ] = = 0 ) {
exec - > index - = len ;
break ;
}
if ( exec - > transcount > = atom - > min ) {
int transno = exec - > transno ;
xmlRegStatePtr state = exec - > state ;
/*
* The transition is acceptable save it
*/
exec - > transno = - 1 ; /* trick */
exec - > state = to ;
xmlFARegExecSave ( exec ) ;
exec - > transno = transno ;
exec - > state = state ;
}
codepoint = CUR_SCHAR ( & ( exec - > inputString [ exec - > index ] ) ,
len ) ;
ret = xmlRegCheckCharacter ( atom , codepoint ) ;
exec - > transcount + + ;
} while ( ret = = 1 ) ;
if ( exec - > transcount < atom - > min )
ret = 0 ;
/*
* If the last check failed but one transition was found
* possible , rollback
*/
if ( ret < 0 )
ret = 0 ;
if ( ret = = 0 ) {
goto rollback ;
}
}
}
if ( ret = = 1 ) {
if ( exec - > state - > nbTrans > exec - > transno + 1 ) {
xmlFARegExecSave ( exec ) ;
}
2006-03-22 02:17:57 +03:00
/*
* restart count for expressions like this ( ( abc ) { 2 } ) *
*/
if ( trans - > count > = 0 ) {
exec - > counts [ trans - > count ] = 0 ;
}
2002-04-16 19:50:10 +04:00
if ( trans - > counter > = 0 ) {
exec - > counts [ trans - > counter ] + + ;
}
exec - > state = exec - > comp - > states [ trans - > to ] ;
exec - > transno = 0 ;
if ( trans - > atom ! = NULL ) {
exec - > index + = len ;
}
goto progress ;
} else if ( ret < 0 ) {
2023-09-22 16:25:40 +03:00
exec - > status = XML_REGEXP_INTERNAL_ERROR ;
2002-04-16 19:50:10 +04:00
break ;
}
}
if ( ( exec - > transno ! = 0 ) | | ( exec - > state - > nbTrans = = 0 ) ) {
rollback :
/*
* Failed to find a way out
*/
exec - > determinist = 0 ;
xmlFARegExecRollBack ( exec ) ;
}
progress :
continue ;
}
}
# endif
/************************************************************************
2012-09-11 09:26:36 +04:00
* *
2004-05-06 08:17:26 +04:00
* Parser for the Schemas Datatype Regular Expressions *
2002-04-16 19:50:10 +04:00
* http : //www.w3.org/TR/2001/REC-xmlschema-2-20010502/#regexs *
2012-09-11 09:26:36 +04:00
* *
2002-04-16 19:50:10 +04:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* xmlFAIsChar :
2002-04-20 21:38:48 +04:00
* @ ctxt : a regexp parser context
2002-04-16 19:50:10 +04:00
*
* [ 10 ] Char : : = [ ^ . \ ? * + ( ) | # x5B # x5D ]
*/
static int
xmlFAIsChar ( xmlRegParserCtxtPtr ctxt ) {
int cur ;
int len ;
2023-09-22 16:25:40 +03:00
len = 4 ;
cur = xmlGetUTF8Char ( ctxt - > cur , & len ) ;
if ( cur < 0 ) {
ERROR ( " Invalid UTF-8 " ) ;
return ( 0 ) ;
}
2002-04-16 19:50:10 +04:00
if ( ( cur = = ' . ' ) | | ( cur = = ' \\ ' ) | | ( cur = = ' ? ' ) | |
( cur = = ' * ' ) | | ( cur = = ' + ' ) | | ( cur = = ' ( ' ) | |
( cur = = ' ) ' ) | | ( cur = = ' | ' ) | | ( cur = = 0x5B ) | |
( cur = = 0x5D ) | | ( cur = = 0 ) )
return ( - 1 ) ;
return ( cur ) ;
}
/**
* xmlFAParseCharProp :
2002-04-20 21:38:48 +04:00
* @ ctxt : a regexp parser context
2002-04-16 19:50:10 +04:00
*
* [ 27 ] charProp : : = IsCategory | IsBlock
* [ 28 ] IsCategory : : = Letters | Marks | Numbers | Punctuation |
2012-09-11 09:26:36 +04:00
* Separators | Symbols | Others
2002-04-16 19:50:10 +04:00
* [ 29 ] Letters : : = ' L ' [ ultmo ] ?
* [ 30 ] Marks : : = ' M ' [ nce ] ?
* [ 31 ] Numbers : : = ' N ' [ dlo ] ?
* [ 32 ] Punctuation : : = ' P ' [ cdseifo ] ?
* [ 33 ] Separators : : = ' Z ' [ slp ] ?
* [ 34 ] Symbols : : = ' S ' [ mcko ] ?
* [ 35 ] Others : : = ' C ' [ cfon ] ?
* [ 36 ] IsBlock : : = ' Is ' [ a - zA - Z0 - 9 # x2D ] +
*/
static void
xmlFAParseCharProp ( xmlRegParserCtxtPtr ctxt ) {
int cur ;
2003-08-01 19:55:39 +04:00
xmlRegAtomType type = ( xmlRegAtomType ) 0 ;
2002-04-16 19:50:10 +04:00
xmlChar * blockName = NULL ;
2012-09-11 09:26:36 +04:00
2002-04-16 19:50:10 +04:00
cur = CUR ;
if ( cur = = ' L ' ) {
NEXT ;
cur = CUR ;
if ( cur = = ' u ' ) {
NEXT ;
type = XML_REGEXP_LETTER_UPPERCASE ;
} else if ( cur = = ' l ' ) {
NEXT ;
type = XML_REGEXP_LETTER_LOWERCASE ;
} else if ( cur = = ' t ' ) {
NEXT ;
type = XML_REGEXP_LETTER_TITLECASE ;
} else if ( cur = = ' m ' ) {
NEXT ;
type = XML_REGEXP_LETTER_MODIFIER ;
} else if ( cur = = ' o ' ) {
NEXT ;
type = XML_REGEXP_LETTER_OTHERS ;
} else {
type = XML_REGEXP_LETTER ;
}
} else if ( cur = = ' M ' ) {
NEXT ;
cur = CUR ;
if ( cur = = ' n ' ) {
NEXT ;
/* nonspacing */
type = XML_REGEXP_MARK_NONSPACING ;
} else if ( cur = = ' c ' ) {
NEXT ;
/* spacing combining */
type = XML_REGEXP_MARK_SPACECOMBINING ;
} else if ( cur = = ' e ' ) {
NEXT ;
/* enclosing */
type = XML_REGEXP_MARK_ENCLOSING ;
} else {
/* all marks */
type = XML_REGEXP_MARK ;
}
} else if ( cur = = ' N ' ) {
NEXT ;
cur = CUR ;
if ( cur = = ' d ' ) {
NEXT ;
/* digital */
type = XML_REGEXP_NUMBER_DECIMAL ;
} else if ( cur = = ' l ' ) {
NEXT ;
/* letter */
type = XML_REGEXP_NUMBER_LETTER ;
} else if ( cur = = ' o ' ) {
NEXT ;
/* other */
type = XML_REGEXP_NUMBER_OTHERS ;
} else {
/* all numbers */
type = XML_REGEXP_NUMBER ;
}
} else if ( cur = = ' P ' ) {
NEXT ;
cur = CUR ;
if ( cur = = ' c ' ) {
NEXT ;
/* connector */
type = XML_REGEXP_PUNCT_CONNECTOR ;
} else if ( cur = = ' d ' ) {
NEXT ;
/* dash */
type = XML_REGEXP_PUNCT_DASH ;
} else if ( cur = = ' s ' ) {
NEXT ;
/* open */
type = XML_REGEXP_PUNCT_OPEN ;
} else if ( cur = = ' e ' ) {
NEXT ;
/* close */
type = XML_REGEXP_PUNCT_CLOSE ;
} else if ( cur = = ' i ' ) {
NEXT ;
/* initial quote */
type = XML_REGEXP_PUNCT_INITQUOTE ;
} else if ( cur = = ' f ' ) {
NEXT ;
/* final quote */
type = XML_REGEXP_PUNCT_FINQUOTE ;
} else if ( cur = = ' o ' ) {
NEXT ;
/* other */
type = XML_REGEXP_PUNCT_OTHERS ;
} else {
/* all punctuation */
type = XML_REGEXP_PUNCT ;
}
} else if ( cur = = ' Z ' ) {
NEXT ;
cur = CUR ;
if ( cur = = ' s ' ) {
NEXT ;
/* space */
type = XML_REGEXP_SEPAR_SPACE ;
} else if ( cur = = ' l ' ) {
NEXT ;
/* line */
type = XML_REGEXP_SEPAR_LINE ;
} else if ( cur = = ' p ' ) {
NEXT ;
/* paragraph */
type = XML_REGEXP_SEPAR_PARA ;
} else {
/* all separators */
type = XML_REGEXP_SEPAR ;
}
} else if ( cur = = ' S ' ) {
NEXT ;
cur = CUR ;
if ( cur = = ' m ' ) {
NEXT ;
type = XML_REGEXP_SYMBOL_MATH ;
/* math */
} else if ( cur = = ' c ' ) {
NEXT ;
type = XML_REGEXP_SYMBOL_CURRENCY ;
/* currency */
} else if ( cur = = ' k ' ) {
NEXT ;
type = XML_REGEXP_SYMBOL_MODIFIER ;
/* modifiers */
} else if ( cur = = ' o ' ) {
NEXT ;
type = XML_REGEXP_SYMBOL_OTHERS ;
/* other */
} else {
/* all symbols */
type = XML_REGEXP_SYMBOL ;
}
} else if ( cur = = ' C ' ) {
NEXT ;
cur = CUR ;
if ( cur = = ' c ' ) {
NEXT ;
/* control */
type = XML_REGEXP_OTHER_CONTROL ;
} else if ( cur = = ' f ' ) {
NEXT ;
/* format */
type = XML_REGEXP_OTHER_FORMAT ;
} else if ( cur = = ' o ' ) {
NEXT ;
/* private use */
type = XML_REGEXP_OTHER_PRIVATE ;
} else if ( cur = = ' n ' ) {
NEXT ;
/* not assigned */
type = XML_REGEXP_OTHER_NA ;
} else {
/* all others */
type = XML_REGEXP_OTHER ;
}
} else if ( cur = = ' I ' ) {
const xmlChar * start ;
NEXT ;
cur = CUR ;
if ( cur ! = ' s ' ) {
ERROR ( " IsXXXX expected " ) ;
return ;
}
NEXT ;
start = ctxt - > cur ;
cur = CUR ;
2012-09-11 09:26:36 +04:00
if ( ( ( cur > = ' a ' ) & & ( cur < = ' z ' ) ) | |
( ( cur > = ' A ' ) & & ( cur < = ' Z ' ) ) | |
( ( cur > = ' 0 ' ) & & ( cur < = ' 9 ' ) ) | |
2002-04-16 19:50:10 +04:00
( cur = = 0x2D ) ) {
NEXT ;
cur = CUR ;
2012-09-11 09:26:36 +04:00
while ( ( ( cur > = ' a ' ) & & ( cur < = ' z ' ) ) | |
( ( cur > = ' A ' ) & & ( cur < = ' Z ' ) ) | |
( ( cur > = ' 0 ' ) & & ( cur < = ' 9 ' ) ) | |
2002-04-16 19:50:10 +04:00
( cur = = 0x2D ) ) {
NEXT ;
cur = CUR ;
}
}
type = XML_REGEXP_BLOCK_NAME ;
blockName = xmlStrndup ( start , ctxt - > cur - start ) ;
2023-12-10 19:09:45 +03:00
if ( blockName = = NULL )
2023-12-18 23:39:51 +03:00
xmlRegexpErrMemory ( ctxt ) ;
2002-04-16 19:50:10 +04:00
} else {
ERROR ( " Unknown char property " ) ;
return ;
}
if ( ctxt - > atom = = NULL ) {
ctxt - > atom = xmlRegNewAtom ( ctxt , type ) ;
2023-02-17 17:40:32 +03:00
if ( ctxt - > atom = = NULL ) {
xmlFree ( blockName ) ;
return ;
}
ctxt - > atom - > valuep = blockName ;
2002-04-16 19:50:10 +04:00
} else if ( ctxt - > atom - > type = = XML_REGEXP_RANGES ) {
2023-02-17 17:40:32 +03:00
if ( xmlRegAtomAddRange ( ctxt , ctxt - > atom , ctxt - > neg ,
type , 0 , 0 , blockName ) = = NULL ) {
xmlFree ( blockName ) ;
}
2002-04-16 19:50:10 +04:00
}
}
2021-05-31 08:45:18 +03:00
static int parse_escaped_codeunit ( xmlRegParserCtxtPtr ctxt )
{
int val = 0 , i , cur ;
for ( i = 0 ; i < 4 ; i + + ) {
NEXT ;
val * = 16 ;
cur = CUR ;
if ( cur > = ' 0 ' & & cur < = ' 9 ' ) {
val + = cur - ' 0 ' ;
} else if ( cur > = ' A ' & & cur < = ' F ' ) {
val + = cur - ' A ' + 10 ;
} else if ( cur > = ' a ' & & cur < = ' f ' ) {
val + = cur - ' a ' + 10 ;
} else {
ERROR ( " Expecting hex digit " ) ;
return - 1 ;
}
}
return val ;
}
static int parse_escaped_codepoint ( xmlRegParserCtxtPtr ctxt )
{
int val = parse_escaped_codeunit ( ctxt ) ;
if ( 0xD800 < = val & & val < = 0xDBFF ) {
NEXT ;
if ( CUR = = ' \\ ' ) {
NEXT ;
if ( CUR = = ' u ' ) {
int low = parse_escaped_codeunit ( ctxt ) ;
if ( 0xDC00 < = low & & low < = 0xDFFF ) {
return ( val - 0xD800 ) * 0x400 + ( low - 0xDC00 ) + 0x10000 ;
}
}
}
ERROR ( " Invalid low surrogate pair code unit " ) ;
val = - 1 ;
}
return val ;
}
2002-04-16 19:50:10 +04:00
/**
* xmlFAParseCharClassEsc :
2002-04-20 21:38:48 +04:00
* @ ctxt : a regexp parser context
2002-04-16 19:50:10 +04:00
*
2012-09-11 09:26:36 +04:00
* [ 23 ] charClassEsc : : = ( SingleCharEsc | MultiCharEsc | catEsc | complEsc )
2002-04-16 19:50:10 +04:00
* [ 24 ] SingleCharEsc : : = ' \ ' [ nrt \ | . ? * + ( ) { } # x2D # x5B # x5D # x5E ]
* [ 25 ] catEsc : : = ' \ p { ' charProp ' } '
* [ 26 ] complEsc : : = ' \ P { ' charProp ' } '
* [ 37 ] MultiCharEsc : : = ' . ' | ( ' \ ' [ sSiIcCdDwW ] )
*/
static void
xmlFAParseCharClassEsc ( xmlRegParserCtxtPtr ctxt ) {
int cur ;
if ( CUR = = ' . ' ) {
if ( ctxt - > atom = = NULL ) {
ctxt - > atom = xmlRegNewAtom ( ctxt , XML_REGEXP_ANYCHAR ) ;
} else if ( ctxt - > atom - > type = = XML_REGEXP_RANGES ) {
xmlRegAtomAddRange ( ctxt , ctxt - > atom , ctxt - > neg ,
XML_REGEXP_ANYCHAR , 0 , 0 , NULL ) ;
}
NEXT ;
return ;
}
if ( CUR ! = ' \\ ' ) {
ERROR ( " Escaped sequence: expecting \\ " ) ;
return ;
}
NEXT ;
cur = CUR ;
if ( cur = = ' p ' ) {
NEXT ;
if ( CUR ! = ' { ' ) {
ERROR ( " Expecting '{' " ) ;
return ;
}
NEXT ;
xmlFAParseCharProp ( ctxt ) ;
if ( CUR ! = ' } ' ) {
ERROR ( " Expecting '}' " ) ;
return ;
}
NEXT ;
} else if ( cur = = ' P ' ) {
NEXT ;
if ( CUR ! = ' { ' ) {
ERROR ( " Expecting '{' " ) ;
return ;
}
NEXT ;
xmlFAParseCharProp ( ctxt ) ;
2017-07-04 18:13:06 +03:00
if ( ctxt - > atom ! = NULL )
ctxt - > atom - > neg = 1 ;
2002-04-16 19:50:10 +04:00
if ( CUR ! = ' } ' ) {
ERROR ( " Expecting '}' " ) ;
return ;
}
NEXT ;
} else if ( ( cur = = ' n ' ) | | ( cur = = ' r ' ) | | ( cur = = ' t ' ) | | ( cur = = ' \\ ' ) | |
( cur = = ' | ' ) | | ( cur = = ' . ' ) | | ( cur = = ' ? ' ) | | ( cur = = ' * ' ) | |
( cur = = ' + ' ) | | ( cur = = ' ( ' ) | | ( cur = = ' ) ' ) | | ( cur = = ' { ' ) | |
( cur = = ' } ' ) | | ( cur = = 0x2D ) | | ( cur = = 0x5B ) | | ( cur = = 0x5D ) | |
2021-05-31 08:45:18 +03:00
( cur = = 0x5E ) | |
/* Non-standard escape sequences:
* Java 1.8 | . NET Core 3.1 | MSXML 6 */
( cur = = ' ! ' ) | | /* + | + | + */
( cur = = ' " ' ) | | /* + | + | + */
( cur = = ' # ' ) | | /* + | + | + */
( cur = = ' $ ' ) | | /* + | + | + */
( cur = = ' % ' ) | | /* + | + | + */
( cur = = ' , ' ) | | /* + | + | + */
( cur = = ' / ' ) | | /* + | + | + */
( cur = = ' : ' ) | | /* + | + | + */
( cur = = ' ; ' ) | | /* + | + | + */
( cur = = ' = ' ) | | /* + | + | + */
( cur = = ' > ' ) | | /* | + | + */
( cur = = ' @ ' ) | | /* + | + | + */
( cur = = ' ` ' ) | | /* + | + | + */
( cur = = ' ~ ' ) | | /* + | + | + */
( cur = = ' u ' ) ) { /* | + | + */
2002-04-16 19:50:10 +04:00
if ( ctxt - > atom = = NULL ) {
ctxt - > atom = xmlRegNewAtom ( ctxt , XML_REGEXP_CHARVAL ) ;
2005-07-14 16:58:49 +04:00
if ( ctxt - > atom ! = NULL ) {
switch ( cur ) {
case ' n ' :
ctxt - > atom - > codepoint = ' \n ' ;
break ;
case ' r ' :
ctxt - > atom - > codepoint = ' \r ' ;
break ;
case ' t ' :
ctxt - > atom - > codepoint = ' \t ' ;
break ;
2021-05-29 17:36:44 +03:00
case ' u ' :
2021-05-31 08:45:18 +03:00
cur = parse_escaped_codepoint ( ctxt ) ;
if ( cur < 0 ) {
return ;
2021-05-29 17:36:44 +03:00
}
2021-05-31 08:45:18 +03:00
ctxt - > atom - > codepoint = cur ;
2021-05-29 17:36:44 +03:00
break ;
2005-07-14 16:58:49 +04:00
default :
ctxt - > atom - > codepoint = cur ;
}
}
2002-04-16 19:50:10 +04:00
} else if ( ctxt - > atom - > type = = XML_REGEXP_RANGES ) {
2010-03-15 13:13:39 +03:00
switch ( cur ) {
case ' n ' :
cur = ' \n ' ;
break ;
case ' r ' :
cur = ' \r ' ;
break ;
case ' t ' :
cur = ' \t ' ;
break ;
}
2002-04-16 19:50:10 +04:00
xmlRegAtomAddRange ( ctxt , ctxt - > atom , ctxt - > neg ,
XML_REGEXP_CHARVAL , cur , cur , NULL ) ;
}
NEXT ;
} else if ( ( cur = = ' s ' ) | | ( cur = = ' S ' ) | | ( cur = = ' i ' ) | | ( cur = = ' I ' ) | |
( cur = = ' c ' ) | | ( cur = = ' C ' ) | | ( cur = = ' d ' ) | | ( cur = = ' D ' ) | |
( cur = = ' w ' ) | | ( cur = = ' W ' ) ) {
2002-04-17 20:28:10 +04:00
xmlRegAtomType type = XML_REGEXP_ANYSPACE ;
2002-04-16 19:50:10 +04:00
switch ( cur ) {
2012-09-11 09:26:36 +04:00
case ' s ' :
2002-04-16 19:50:10 +04:00
type = XML_REGEXP_ANYSPACE ;
break ;
2012-09-11 09:26:36 +04:00
case ' S ' :
2002-04-16 19:50:10 +04:00
type = XML_REGEXP_NOTSPACE ;
break ;
2012-09-11 09:26:36 +04:00
case ' i ' :
2002-04-16 19:50:10 +04:00
type = XML_REGEXP_INITNAME ;
break ;
2012-09-11 09:26:36 +04:00
case ' I ' :
2002-04-16 19:50:10 +04:00
type = XML_REGEXP_NOTINITNAME ;
break ;
2012-09-11 09:26:36 +04:00
case ' c ' :
2002-04-16 19:50:10 +04:00
type = XML_REGEXP_NAMECHAR ;
break ;
2012-09-11 09:26:36 +04:00
case ' C ' :
2002-04-16 19:50:10 +04:00
type = XML_REGEXP_NOTNAMECHAR ;
break ;
2012-09-11 09:26:36 +04:00
case ' d ' :
2002-04-16 19:50:10 +04:00
type = XML_REGEXP_DECIMAL ;
break ;
2012-09-11 09:26:36 +04:00
case ' D ' :
2002-04-16 19:50:10 +04:00
type = XML_REGEXP_NOTDECIMAL ;
break ;
2012-09-11 09:26:36 +04:00
case ' w ' :
2002-04-16 19:50:10 +04:00
type = XML_REGEXP_REALCHAR ;
break ;
2012-09-11 09:26:36 +04:00
case ' W ' :
2002-04-16 19:50:10 +04:00
type = XML_REGEXP_NOTREALCHAR ;
break ;
}
NEXT ;
if ( ctxt - > atom = = NULL ) {
ctxt - > atom = xmlRegNewAtom ( ctxt , type ) ;
} else if ( ctxt - > atom - > type = = XML_REGEXP_RANGES ) {
xmlRegAtomAddRange ( ctxt , ctxt - > atom , ctxt - > neg ,
type , 0 , 0 , NULL ) ;
}
2007-04-25 17:55:20 +04:00
} else {
ERROR ( " Wrong escape sequence, misuse of character ' \\ ' " ) ;
2002-04-16 19:50:10 +04:00
}
}
/**
* xmlFAParseCharRange :
2002-04-20 21:38:48 +04:00
* @ ctxt : a regexp parser context
2002-04-16 19:50:10 +04:00
*
2012-09-11 09:26:36 +04:00
* [ 17 ] charRange : : = seRange | XmlCharRef | XmlCharIncDash
2002-04-16 19:50:10 +04:00
* [ 18 ] seRange : : = charOrEsc ' - ' charOrEsc
* [ 20 ] charOrEsc : : = XmlChar | SingleCharEsc
* [ 21 ] XmlChar : : = [ ^ \ # x2D # x5B # x5D ]
* [ 22 ] XmlCharIncDash : : = [ ^ \ # x5B # x5D ]
*/
static void
xmlFAParseCharRange ( xmlRegParserCtxtPtr ctxt ) {
2003-12-27 04:54:25 +03:00
int cur , len ;
2002-04-16 19:50:10 +04:00
int start = - 1 ;
int end = - 1 ;
2006-10-18 01:23:17 +04:00
if ( CUR = = ' \0 ' ) {
ERROR ( " Expecting ']' " ) ;
return ;
}
2002-04-16 19:50:10 +04:00
cur = CUR ;
if ( cur = = ' \\ ' ) {
NEXT ;
cur = CUR ;
switch ( cur ) {
case ' n ' : start = 0xA ; break ;
case ' r ' : start = 0xD ; break ;
case ' t ' : start = 0x9 ; break ;
case ' \\ ' : case ' | ' : case ' . ' : case ' - ' : case ' ^ ' : case ' ? ' :
case ' * ' : case ' + ' : case ' { ' : case ' } ' : case ' ( ' : case ' ) ' :
case ' [ ' : case ' ] ' :
start = cur ; break ;
default :
ERROR ( " Invalid escape value " ) ;
return ;
}
end = start ;
2003-12-27 04:54:25 +03:00
len = 1 ;
2002-04-16 19:50:10 +04:00
} else if ( ( cur ! = 0x5B ) & & ( cur ! = 0x5D ) ) {
2023-09-22 16:25:40 +03:00
len = 4 ;
end = start = xmlGetUTF8Char ( ctxt - > cur , & len ) ;
if ( start < 0 ) {
ERROR ( " Invalid UTF-8 " ) ;
return ;
}
2002-04-16 19:50:10 +04:00
} else {
ERROR ( " Expecting a char range " ) ;
return ;
}
2007-03-21 16:16:33 +03:00
/*
* Since we are " inside " a range , we can assume ctxt - > cur is past
* the start of ctxt - > string , and PREV should be safe
*/
if ( ( start = = ' - ' ) & & ( NXT ( 1 ) ! = ' ] ' ) & & ( PREV ! = ' [ ' ) & & ( PREV ! = ' ^ ' ) ) {
NEXTL ( len ) ;
2002-04-16 19:50:10 +04:00
return ;
}
2007-03-21 16:16:33 +03:00
NEXTL ( len ) ;
2002-04-16 19:50:10 +04:00
cur = CUR ;
2022-04-23 20:19:33 +03:00
if ( ( cur ! = ' - ' ) | | ( NXT ( 1 ) = = ' [ ' ) | | ( NXT ( 1 ) = = ' ] ' ) ) {
2002-04-16 19:50:10 +04:00
xmlRegAtomAddRange ( ctxt , ctxt - > atom , ctxt - > neg ,
XML_REGEXP_CHARVAL , start , end , NULL ) ;
return ;
}
NEXT ;
cur = CUR ;
if ( cur = = ' \\ ' ) {
NEXT ;
cur = CUR ;
switch ( cur ) {
case ' n ' : end = 0xA ; break ;
case ' r ' : end = 0xD ; break ;
case ' t ' : end = 0x9 ; break ;
case ' \\ ' : case ' | ' : case ' . ' : case ' - ' : case ' ^ ' : case ' ? ' :
case ' * ' : case ' + ' : case ' { ' : case ' } ' : case ' ( ' : case ' ) ' :
case ' [ ' : case ' ] ' :
end = cur ; break ;
default :
ERROR ( " Invalid escape value " ) ;
return ;
}
2003-12-27 04:54:25 +03:00
len = 1 ;
2017-07-04 19:38:03 +03:00
} else if ( ( cur ! = ' \0 ' ) & & ( cur ! = 0x5B ) & & ( cur ! = 0x5D ) ) {
2023-09-22 16:25:40 +03:00
len = 4 ;
end = xmlGetUTF8Char ( ctxt - > cur , & len ) ;
if ( end < 0 ) {
ERROR ( " Invalid UTF-8 " ) ;
return ;
}
2002-04-16 19:50:10 +04:00
} else {
ERROR ( " Expecting the end of a char range " ) ;
return ;
}
2016-03-07 17:34:26 +03:00
2002-04-16 19:50:10 +04:00
/* TODO check that the values are acceptable character ranges for XML */
if ( end < start ) {
ERROR ( " End of range is before start of range " ) ;
} else {
2016-03-07 17:34:26 +03:00
NEXTL ( len ) ;
2002-04-16 19:50:10 +04:00
xmlRegAtomAddRange ( ctxt , ctxt - > atom , ctxt - > neg ,
XML_REGEXP_CHARVAL , start , end , NULL ) ;
}
return ;
}
/**
* xmlFAParsePosCharGroup :
2002-04-20 21:38:48 +04:00
* @ ctxt : a regexp parser context
2002-04-16 19:50:10 +04:00
*
* [ 14 ] posCharGroup : : = ( charRange | charClassEsc ) +
*/
static void
xmlFAParsePosCharGroup ( xmlRegParserCtxtPtr ctxt ) {
do {
2008-02-08 13:37:18 +03:00
if ( CUR = = ' \\ ' ) {
2002-04-16 19:50:10 +04:00
xmlFAParseCharClassEsc ( ctxt ) ;
} else {
xmlFAParseCharRange ( ctxt ) ;
}
2020-10-25 22:21:43 +03:00
} while ( ( CUR ! = ' ] ' ) & & ( CUR ! = ' - ' ) & &
2006-10-18 01:23:17 +04:00
( CUR ! = 0 ) & & ( ctxt - > error = = 0 ) ) ;
2002-04-16 19:50:10 +04:00
}
/**
* xmlFAParseCharGroup :
2002-04-20 21:38:48 +04:00
* @ ctxt : a regexp parser context
2002-04-16 19:50:10 +04:00
*
* [ 13 ] charGroup : : = posCharGroup | negCharGroup | charClassSub
* [ 15 ] negCharGroup : : = ' ^ ' posCharGroup
2012-09-11 09:26:36 +04:00
* [ 16 ] charClassSub : : = ( posCharGroup | negCharGroup ) ' - ' charClassExpr
2002-04-16 19:50:10 +04:00
* [ 12 ] charClassExpr : : = ' [ ' charGroup ' ] '
*/
static void
xmlFAParseCharGroup ( xmlRegParserCtxtPtr ctxt ) {
2020-10-25 22:21:43 +03:00
int neg = ctxt - > neg ;
2002-04-16 19:50:10 +04:00
2020-10-25 22:21:43 +03:00
if ( CUR = = ' ^ ' ) {
NEXT ;
ctxt - > neg = ! ctxt - > neg ;
xmlFAParsePosCharGroup ( ctxt ) ;
ctxt - > neg = neg ;
}
while ( ( CUR ! = ' ] ' ) & & ( ctxt - > error = = 0 ) ) {
if ( ( CUR = = ' - ' ) & & ( NXT ( 1 ) = = ' [ ' ) ) {
2004-03-20 17:51:25 +03:00
NEXT ; /* eat the '-' */
NEXT ; /* eat the '[' */
2020-10-25 22:21:43 +03:00
ctxt - > neg = 2 ;
2002-04-16 19:50:10 +04:00
xmlFAParseCharGroup ( ctxt ) ;
2020-10-25 22:21:43 +03:00
ctxt - > neg = neg ;
2002-04-16 19:50:10 +04:00
if ( CUR = = ' ] ' ) {
NEXT ;
} else {
ERROR ( " charClassExpr: ']' expected " ) ;
}
break ;
2020-10-25 22:21:43 +03:00
} else {
2002-04-16 19:50:10 +04:00
xmlFAParsePosCharGroup ( ctxt ) ;
}
}
}
/**
* xmlFAParseCharClass :
2002-04-20 21:38:48 +04:00
* @ ctxt : a regexp parser context
2002-04-16 19:50:10 +04:00
*
* [ 11 ] charClass : : = charClassEsc | charClassExpr
* [ 12 ] charClassExpr : : = ' [ ' charGroup ' ] '
*/
static void
xmlFAParseCharClass ( xmlRegParserCtxtPtr ctxt ) {
if ( CUR = = ' [ ' ) {
NEXT ;
ctxt - > atom = xmlRegNewAtom ( ctxt , XML_REGEXP_RANGES ) ;
if ( ctxt - > atom = = NULL )
return ;
xmlFAParseCharGroup ( ctxt ) ;
if ( CUR = = ' ] ' ) {
NEXT ;
} else {
ERROR ( " xmlFAParseCharClass: ']' expected " ) ;
}
} else {
xmlFAParseCharClassEsc ( ctxt ) ;
}
}
/**
* xmlFAParseQuantExact :
2002-04-20 21:38:48 +04:00
* @ ctxt : a regexp parser context
2002-04-16 19:50:10 +04:00
*
* [ 8 ] QuantExact : : = [ 0 - 9 ] +
2003-04-24 20:06:47 +04:00
*
* Returns 0 if success or - 1 in case of error
2002-04-16 19:50:10 +04:00
*/
static int
xmlFAParseQuantExact ( xmlRegParserCtxtPtr ctxt ) {
int ret = 0 ;
int ok = 0 ;
2020-06-25 13:17:50 +03:00
int overflow = 0 ;
2002-04-16 19:50:10 +04:00
while ( ( CUR > = ' 0 ' ) & & ( CUR < = ' 9 ' ) ) {
2020-06-25 13:17:50 +03:00
if ( ret > INT_MAX / 10 ) {
overflow = 1 ;
} else {
int digit = CUR - ' 0 ' ;
ret * = 10 ;
if ( ret > INT_MAX - digit )
overflow = 1 ;
else
ret + = digit ;
}
2002-04-16 19:50:10 +04:00
ok = 1 ;
NEXT ;
}
2020-06-25 13:17:50 +03:00
if ( ( ok ! = 1 ) | | ( overflow = = 1 ) ) {
2002-04-16 19:50:10 +04:00
return ( - 1 ) ;
}
return ( ret ) ;
}
/**
* xmlFAParseQuantifier :
2002-04-20 21:38:48 +04:00
* @ ctxt : a regexp parser context
2002-04-16 19:50:10 +04:00
*
* [ 4 ] quantifier : : = [ ? * + ] | ( ' { ' quantity ' } ' )
* [ 5 ] quantity : : = quantRange | quantMin | QuantExact
* [ 6 ] quantRange : : = QuantExact ' , ' QuantExact
* [ 7 ] quantMin : : = QuantExact ' , '
* [ 8 ] QuantExact : : = [ 0 - 9 ] +
*/
static int
xmlFAParseQuantifier ( xmlRegParserCtxtPtr ctxt ) {
int cur ;
cur = CUR ;
if ( ( cur = = ' ? ' ) | | ( cur = = ' * ' ) | | ( cur = = ' + ' ) ) {
if ( ctxt - > atom ! = NULL ) {
if ( cur = = ' ? ' )
ctxt - > atom - > quant = XML_REGEXP_QUANT_OPT ;
else if ( cur = = ' * ' )
ctxt - > atom - > quant = XML_REGEXP_QUANT_MULT ;
else if ( cur = = ' + ' )
ctxt - > atom - > quant = XML_REGEXP_QUANT_PLUS ;
}
NEXT ;
return ( 1 ) ;
}
if ( cur = = ' { ' ) {
int min = 0 , max = 0 ;
NEXT ;
cur = xmlFAParseQuantExact ( ctxt ) ;
if ( cur > = 0 )
min = cur ;
2020-07-02 12:51:31 +03:00
else {
ERROR ( " Improper quantifier " ) ;
}
2002-04-16 19:50:10 +04:00
if ( CUR = = ' , ' ) {
NEXT ;
2003-12-03 15:12:27 +03:00
if ( CUR = = ' } ' )
max = INT_MAX ;
else {
cur = xmlFAParseQuantExact ( ctxt ) ;
if ( cur > = 0 )
max = cur ;
else {
ERROR ( " Improper quantifier " ) ;
}
}
2002-04-16 19:50:10 +04:00
}
if ( CUR = = ' } ' ) {
NEXT ;
} else {
ERROR ( " Unterminated quantifier " ) ;
}
if ( max = = 0 )
max = min ;
if ( ctxt - > atom ! = NULL ) {
ctxt - > atom - > quant = XML_REGEXP_QUANT_RANGE ;
ctxt - > atom - > min = min ;
ctxt - > atom - > max = max ;
}
return ( 1 ) ;
}
return ( 0 ) ;
}
/**
* xmlFAParseAtom :
2002-04-20 21:38:48 +04:00
* @ ctxt : a regexp parser context
2002-04-16 19:50:10 +04:00
*
* [ 9 ] atom : : = Char | charClass | ( ' ( ' regExp ' ) ' )
*/
static int
xmlFAParseAtom ( xmlRegParserCtxtPtr ctxt ) {
int codepoint , len ;
codepoint = xmlFAIsChar ( ctxt ) ;
if ( codepoint > 0 ) {
ctxt - > atom = xmlRegNewAtom ( ctxt , XML_REGEXP_CHARVAL ) ;
if ( ctxt - > atom = = NULL )
return ( - 1 ) ;
2023-09-22 16:25:40 +03:00
len = 4 ;
codepoint = xmlGetUTF8Char ( ctxt - > cur , & len ) ;
if ( codepoint < 0 ) {
ERROR ( " Invalid UTF-8 " ) ;
return ( - 1 ) ;
}
2002-04-16 19:50:10 +04:00
ctxt - > atom - > codepoint = codepoint ;
NEXTL ( len ) ;
return ( 1 ) ;
} else if ( CUR = = ' | ' ) {
return ( 0 ) ;
} else if ( CUR = = 0 ) {
return ( 0 ) ;
} else if ( CUR = = ' ) ' ) {
return ( 0 ) ;
} else if ( CUR = = ' ( ' ) {
2007-08-22 20:29:21 +04:00
xmlRegStatePtr start , oldend , start0 ;
2002-04-16 19:50:10 +04:00
NEXT ;
2020-07-06 16:22:12 +03:00
if ( ctxt - > depth > = 50 ) {
ERROR ( " xmlFAParseAtom: maximum nesting depth exceeded " ) ;
return ( - 1 ) ;
}
2007-08-22 20:29:21 +04:00
/*
* this extra Epsilon transition is needed if we count with 0 allowed
* unfortunately this can ' t be known at that point
*/
xmlFAGenerateEpsilonTransition ( ctxt , ctxt - > state , NULL ) ;
start0 = ctxt - > state ;
2002-04-16 19:50:10 +04:00
xmlFAGenerateEpsilonTransition ( ctxt , ctxt - > state , NULL ) ;
start = ctxt - > state ;
oldend = ctxt - > end ;
ctxt - > end = NULL ;
ctxt - > atom = NULL ;
2020-07-06 16:22:12 +03:00
ctxt - > depth + + ;
2002-04-16 19:50:10 +04:00
xmlFAParseRegExp ( ctxt , 0 ) ;
2020-07-06 16:22:12 +03:00
ctxt - > depth - - ;
2002-04-16 19:50:10 +04:00
if ( CUR = = ' ) ' ) {
NEXT ;
} else {
ERROR ( " xmlFAParseAtom: expecting ')' " ) ;
}
ctxt - > atom = xmlRegNewAtom ( ctxt , XML_REGEXP_SUBREG ) ;
if ( ctxt - > atom = = NULL )
return ( - 1 ) ;
ctxt - > atom - > start = start ;
2007-08-22 20:29:21 +04:00
ctxt - > atom - > start0 = start0 ;
2002-04-16 19:50:10 +04:00
ctxt - > atom - > stop = ctxt - > state ;
ctxt - > end = oldend ;
return ( 1 ) ;
} else if ( ( CUR = = ' [ ' ) | | ( CUR = = ' \\ ' ) | | ( CUR = = ' . ' ) ) {
xmlFAParseCharClass ( ctxt ) ;
return ( 1 ) ;
}
return ( 0 ) ;
}
/**
* xmlFAParsePiece :
2002-04-20 21:38:48 +04:00
* @ ctxt : a regexp parser context
2002-04-16 19:50:10 +04:00
*
* [ 3 ] piece : : = atom quantifier ?
*/
static int
xmlFAParsePiece ( xmlRegParserCtxtPtr ctxt ) {
int ret ;
ctxt - > atom = NULL ;
ret = xmlFAParseAtom ( ctxt ) ;
if ( ret = = 0 )
return ( 0 ) ;
if ( ctxt - > atom = = NULL ) {
ERROR ( " internal: no atom generated " ) ;
}
xmlFAParseQuantifier ( ctxt ) ;
return ( 1 ) ;
}
/**
* xmlFAParseBranch :
2002-04-20 21:38:48 +04:00
* @ ctxt : a regexp parser context
2006-03-22 02:17:57 +03:00
* @ to : optional target to the end of the branch
*
* @ to is used to optimize by removing duplicate path in automata
* in expressions like ( a | b ) ( c | d )
2002-04-16 19:50:10 +04:00
*
* [ 2 ] branch : : = piece *
*/
2003-04-24 20:06:47 +04:00
static int
2006-03-22 02:17:57 +03:00
xmlFAParseBranch ( xmlRegParserCtxtPtr ctxt , xmlRegStatePtr to ) {
2002-04-16 19:50:10 +04:00
xmlRegStatePtr previous ;
int ret ;
previous = ctxt - > state ;
ret = xmlFAParsePiece ( ctxt ) ;
2019-09-25 14:57:42 +03:00
if ( ret = = 0 ) {
/* Empty branch */
xmlFAGenerateEpsilonTransition ( ctxt , previous , to ) ;
} else {
2012-09-11 09:26:36 +04:00
if ( xmlFAGenerateTransitions ( ctxt , previous ,
2023-02-17 17:20:33 +03:00
( CUR = = ' | ' | | CUR = = ' ) ' | | CUR = = 0 ) ? to : NULL ,
ctxt - > atom ) < 0 ) {
xmlRegFreeAtom ( ctxt - > atom ) ;
ctxt - > atom = NULL ;
2004-03-31 19:50:43 +04:00
return ( - 1 ) ;
2023-02-17 17:20:33 +03:00
}
2004-03-31 19:50:43 +04:00
previous = ctxt - > state ;
2002-04-16 19:50:10 +04:00
ctxt - > atom = NULL ;
}
while ( ( ret ! = 0 ) & & ( ctxt - > error = = 0 ) ) {
ret = xmlFAParsePiece ( ctxt ) ;
if ( ret ! = 0 ) {
2012-09-11 09:26:36 +04:00
if ( xmlFAGenerateTransitions ( ctxt , previous ,
2019-09-25 14:57:42 +03:00
( CUR = = ' | ' | | CUR = = ' ) ' | | CUR = = 0 ) ? to : NULL ,
2023-02-17 17:20:33 +03:00
ctxt - > atom ) < 0 ) {
xmlRegFreeAtom ( ctxt - > atom ) ;
ctxt - > atom = NULL ;
return ( - 1 ) ;
}
2002-04-16 19:50:10 +04:00
previous = ctxt - > state ;
ctxt - > atom = NULL ;
}
}
2003-04-24 20:06:47 +04:00
return ( 0 ) ;
2002-04-16 19:50:10 +04:00
}
/**
* xmlFAParseRegExp :
2002-04-20 21:38:48 +04:00
* @ ctxt : a regexp parser context
2004-05-06 08:17:26 +04:00
* @ top : is this the top - level expression ?
2002-04-16 19:50:10 +04:00
*
* [ 1 ] regExp : : = branch ( ' | ' branch ) *
*/
static void
xmlFAParseRegExp ( xmlRegParserCtxtPtr ctxt , int top ) {
2004-09-28 16:33:52 +04:00
xmlRegStatePtr start , end ;
2002-04-16 19:50:10 +04:00
2004-03-31 19:50:43 +04:00
/* if not top start should have been generated by an epsilon trans */
2002-04-16 19:50:10 +04:00
start = ctxt - > state ;
2004-03-31 19:50:43 +04:00
ctxt - > end = NULL ;
2006-03-22 02:17:57 +03:00
xmlFAParseBranch ( ctxt , NULL ) ;
2004-03-31 19:50:43 +04:00
if ( top ) {
ctxt - > state - > type = XML_REGEXP_FINAL_STATE ;
}
2002-04-16 19:50:10 +04:00
if ( CUR ! = ' | ' ) {
ctxt - > end = ctxt - > state ;
return ;
}
end = ctxt - > state ;
while ( ( CUR = = ' | ' ) & & ( ctxt - > error = = 0 ) ) {
NEXT ;
ctxt - > state = start ;
2004-03-31 19:50:43 +04:00
ctxt - > end = NULL ;
2006-03-22 02:17:57 +03:00
xmlFAParseBranch ( ctxt , end ) ;
2004-03-31 19:50:43 +04:00
}
if ( ! top ) {
ctxt - > state = end ;
2002-04-16 19:50:10 +04:00
ctxt - > end = end ;
}
}
/************************************************************************
2012-09-11 09:26:36 +04:00
* *
* The basic API *
* *
2002-04-16 19:50:10 +04:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* xmlRegexpPrint :
* @ output : the file for the output debug
* @ regexp : the compiled regexp
*
* Print the content of the compiled regular expression
*/
void
xmlRegexpPrint ( FILE * output , xmlRegexpPtr regexp ) {
int i ;
2004-11-08 19:24:57 +03:00
if ( output = = NULL )
return ;
2002-04-16 19:50:10 +04:00
fprintf ( output , " regexp: " ) ;
if ( regexp = = NULL ) {
fprintf ( output , " NULL \n " ) ;
return ;
}
fprintf ( output , " '%s' " , regexp - > string ) ;
fprintf ( output , " \n " ) ;
fprintf ( output , " %d atoms: \n " , regexp - > nbAtoms ) ;
for ( i = 0 ; i < regexp - > nbAtoms ; i + + ) {
fprintf ( output , " %02d " , i ) ;
xmlRegPrintAtom ( output , regexp - > atoms [ i ] ) ;
}
fprintf ( output , " %d states: " , regexp - > nbStates ) ;
fprintf ( output , " \n " ) ;
for ( i = 0 ; i < regexp - > nbStates ; i + + ) {
xmlRegPrintState ( output , regexp - > states [ i ] ) ;
}
fprintf ( output , " %d counters: \n " , regexp - > nbCounters ) ;
for ( i = 0 ; i < regexp - > nbCounters ; i + + ) {
fprintf ( output , " %d: min %d max %d \n " , i , regexp - > counters [ i ] . min ,
regexp - > counters [ i ] . max ) ;
}
}
/**
* xmlRegexpCompile :
* @ regexp : a regular expression string
*
* Parses a regular expression conforming to XML Schemas Part 2 Datatype
2004-05-06 08:17:26 +04:00
* Appendix F and builds an automata suitable for testing strings against
2002-04-16 19:50:10 +04:00
* that regular expression
*
* Returns the compiled expression or NULL in case of error
*/
xmlRegexpPtr
xmlRegexpCompile ( const xmlChar * regexp ) {
2023-02-17 17:23:42 +03:00
xmlRegexpPtr ret = NULL ;
2002-04-16 19:50:10 +04:00
xmlRegParserCtxtPtr ctxt ;
2023-09-22 16:25:40 +03:00
if ( regexp = = NULL )
return ( NULL ) ;
2002-04-16 19:50:10 +04:00
ctxt = xmlRegNewParserCtxt ( regexp ) ;
if ( ctxt = = NULL )
return ( NULL ) ;
/* initialize the parser */
2023-02-15 03:00:03 +03:00
ctxt - > state = xmlRegStatePush ( ctxt ) ;
if ( ctxt - > state = = NULL )
2023-02-17 17:23:42 +03:00
goto error ;
2023-02-15 03:00:03 +03:00
ctxt - > start = ctxt - > state ;
2002-04-16 19:50:10 +04:00
ctxt - > end = NULL ;
/* parse the expression building an automata */
xmlFAParseRegExp ( ctxt , 1 ) ;
if ( CUR ! = 0 ) {
ERROR ( " xmlFAParseRegExp: extra characters " ) ;
}
2023-02-17 17:23:42 +03:00
if ( ctxt - > error ! = 0 )
goto error ;
2002-04-16 19:50:10 +04:00
ctxt - > end = ctxt - > state ;
ctxt - > start - > type = XML_REGEXP_START_STATE ;
ctxt - > end - > type = XML_REGEXP_FINAL_STATE ;
/* remove the Epsilon except for counted transitions */
xmlFAEliminateEpsilonTransitions ( ctxt ) ;
2023-02-17 17:23:42 +03:00
if ( ctxt - > error ! = 0 )
goto error ;
2002-04-16 19:50:10 +04:00
ret = xmlRegEpxFromParse ( ctxt ) ;
2023-02-17 17:23:42 +03:00
error :
2002-04-16 19:50:10 +04:00
xmlRegFreeParserCtxt ( ctxt ) ;
return ( ret ) ;
}
/**
* xmlRegexpExec :
* @ comp : the compiled regular expression
* @ content : the value to check against the regular expression
*
2004-05-06 08:17:26 +04:00
* Check if the regular expression generates the value
2002-04-16 19:50:10 +04:00
*
2004-05-06 08:17:26 +04:00
* Returns 1 if it matches , 0 if not and a negative value in case of error
2002-04-16 19:50:10 +04:00
*/
int
xmlRegexpExec ( xmlRegexpPtr comp , const xmlChar * content ) {
if ( ( comp = = NULL ) | | ( content = = NULL ) )
return ( - 1 ) ;
return ( xmlFARegExec ( comp , content ) ) ;
}
2002-09-19 23:56:43 +04:00
/**
* xmlRegexpIsDeterminist :
* @ comp : the compiled regular expression
*
* Check if the regular expression is determinist
*
2004-05-06 08:17:26 +04:00
* Returns 1 if it yes , 0 if not and a negative value in case of error
2002-09-19 23:56:43 +04:00
*/
int
xmlRegexpIsDeterminist ( xmlRegexpPtr comp ) {
xmlAutomataPtr am ;
int ret ;
if ( comp = = NULL )
return ( - 1 ) ;
if ( comp - > determinist ! = - 1 )
return ( comp - > determinist ) ;
am = xmlNewAutomata ( ) ;
2019-03-05 17:14:34 +03:00
if ( am = = NULL )
return ( - 1 ) ;
2002-09-26 02:25:35 +04:00
if ( am - > states ! = NULL ) {
int i ;
for ( i = 0 ; i < am - > nbStates ; i + + )
xmlRegFreeState ( am - > states [ i ] ) ;
xmlFree ( am - > states ) ;
}
2002-09-19 23:56:43 +04:00
am - > nbAtoms = comp - > nbAtoms ;
am - > atoms = comp - > atoms ;
am - > nbStates = comp - > nbStates ;
am - > states = comp - > states ;
am - > determinist = - 1 ;
2009-08-31 18:47:39 +04:00
am - > flags = comp - > flags ;
2002-09-19 23:56:43 +04:00
ret = xmlFAComputesDeterminism ( am ) ;
am - > atoms = NULL ;
am - > states = NULL ;
xmlFreeAutomata ( am ) ;
2009-08-31 18:47:39 +04:00
comp - > determinist = ret ;
2002-09-19 23:56:43 +04:00
return ( ret ) ;
}
2002-04-16 19:50:10 +04:00
/**
* xmlRegFreeRegexp :
* @ regexp : the regexp
*
* Free a regexp
*/
void
xmlRegFreeRegexp ( xmlRegexpPtr regexp ) {
int i ;
if ( regexp = = NULL )
return ;
if ( regexp - > string ! = NULL )
xmlFree ( regexp - > string ) ;
if ( regexp - > states ! = NULL ) {
for ( i = 0 ; i < regexp - > nbStates ; i + + )
xmlRegFreeState ( regexp - > states [ i ] ) ;
xmlFree ( regexp - > states ) ;
}
if ( regexp - > atoms ! = NULL ) {
for ( i = 0 ; i < regexp - > nbAtoms ; i + + )
xmlRegFreeAtom ( regexp - > atoms [ i ] ) ;
xmlFree ( regexp - > atoms ) ;
}
if ( regexp - > counters ! = NULL )
xmlFree ( regexp - > counters ) ;
2002-09-19 23:56:43 +04:00
if ( regexp - > compact ! = NULL )
xmlFree ( regexp - > compact ) ;
2002-09-24 18:13:13 +04:00
if ( regexp - > transdata ! = NULL )
xmlFree ( regexp - > transdata ) ;
2002-09-19 23:56:43 +04:00
if ( regexp - > stringMap ! = NULL ) {
for ( i = 0 ; i < regexp - > nbstrings ; i + + )
xmlFree ( regexp - > stringMap [ i ] ) ;
xmlFree ( regexp - > stringMap ) ;
}
2002-04-16 19:50:10 +04:00
xmlFree ( regexp ) ;
}
/************************************************************************
2012-09-11 09:26:36 +04:00
* *
* The Automata interface *
* *
2002-04-16 19:50:10 +04:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* xmlNewAutomata :
*
* Create a new automata
*
* Returns the new object or NULL in case of failure
*/
xmlAutomataPtr
xmlNewAutomata ( void ) {
xmlAutomataPtr ctxt ;
ctxt = xmlRegNewParserCtxt ( NULL ) ;
if ( ctxt = = NULL )
return ( NULL ) ;
/* initialize the parser */
2023-02-15 03:00:03 +03:00
ctxt - > state = xmlRegStatePush ( ctxt ) ;
if ( ctxt - > state = = NULL ) {
2003-04-24 20:06:47 +04:00
xmlFreeAutomata ( ctxt ) ;
return ( NULL ) ;
}
2023-02-15 03:00:03 +03:00
ctxt - > start = ctxt - > state ;
ctxt - > end = NULL ;
2006-01-02 13:22:02 +03:00
ctxt - > start - > type = XML_REGEXP_START_STATE ;
2009-08-31 18:47:39 +04:00
ctxt - > flags = 0 ;
2002-04-16 19:50:10 +04:00
return ( ctxt ) ;
}
/**
* xmlFreeAutomata :
* @ am : an automata
*
* Free an automata
*/
void
xmlFreeAutomata ( xmlAutomataPtr am ) {
if ( am = = NULL )
return ;
xmlRegFreeParserCtxt ( am ) ;
}
2009-08-31 18:47:39 +04:00
/**
2009-09-10 20:23:39 +04:00
* xmlAutomataSetFlags :
2009-08-31 18:47:39 +04:00
* @ am : an automata
* @ flags : a set of internal flags
*
* Set some flags on the automata
*/
void
xmlAutomataSetFlags ( xmlAutomataPtr am , int flags ) {
if ( am = = NULL )
return ;
am - > flags | = flags ;
}
2002-04-16 19:50:10 +04:00
/**
* xmlAutomataGetInitState :
* @ am : an automata
*
2002-12-11 17:23:49 +03:00
* Initial state lookup
*
2002-04-16 19:50:10 +04:00
* Returns the initial state of the automata
*/
xmlAutomataStatePtr
xmlAutomataGetInitState ( xmlAutomataPtr am ) {
if ( am = = NULL )
return ( NULL ) ;
return ( am - > start ) ;
}
/**
* xmlAutomataSetFinalState :
* @ am : an automata
* @ state : a state in this automata
*
* Makes that state a final state
*
* Returns 0 or - 1 in case of error
*/
int
xmlAutomataSetFinalState ( xmlAutomataPtr am , xmlAutomataStatePtr state ) {
if ( ( am = = NULL ) | | ( state = = NULL ) )
return ( - 1 ) ;
state - > type = XML_REGEXP_FINAL_STATE ;
return ( 0 ) ;
}
/**
* xmlAutomataNewTransition :
* @ am : an automata
* @ from : the starting point of the transition
* @ to : the target point of the transition or NULL
* @ token : the input string associated to that transition
* @ data : data passed to the callback function if the transition is activated
*
2004-05-06 08:17:26 +04:00
* If @ to is NULL , this creates first a new target state in the automata
2002-04-16 19:50:10 +04:00
* and then adds a transition from the @ from state to the target state
* activated by the value of @ token
*
* Returns the target state or NULL in case of error
*/
xmlAutomataStatePtr
xmlAutomataNewTransition ( xmlAutomataPtr am , xmlAutomataStatePtr from ,
xmlAutomataStatePtr to , const xmlChar * token ,
void * data ) {
xmlRegAtomPtr atom ;
if ( ( am = = NULL ) | | ( from = = NULL ) | | ( token = = NULL ) )
return ( NULL ) ;
atom = xmlRegNewAtom ( am , XML_REGEXP_STRING ) ;
2003-04-24 20:06:47 +04:00
if ( atom = = NULL )
return ( NULL ) ;
2002-04-16 19:50:10 +04:00
atom - > data = data ;
atom - > valuep = xmlStrdup ( token ) ;
2023-12-10 19:09:45 +03:00
if ( atom - > valuep = = NULL ) {
xmlRegFreeAtom ( atom ) ;
2023-12-18 23:39:51 +03:00
xmlRegexpErrMemory ( am ) ;
2023-12-10 19:09:45 +03:00
return ( NULL ) ;
}
2002-04-16 19:50:10 +04:00
2003-04-24 20:06:47 +04:00
if ( xmlFAGenerateTransitions ( am , from , to , atom ) < 0 ) {
xmlRegFreeAtom ( atom ) ;
return ( NULL ) ;
}
2002-04-16 19:50:10 +04:00
if ( to = = NULL )
2003-04-13 23:53:42 +04:00
return ( am - > state ) ;
return ( to ) ;
}
/**
* xmlAutomataNewTransition2 :
* @ am : an automata
* @ from : the starting point of the transition
* @ to : the target point of the transition or NULL
* @ token : the first input string associated to that transition
* @ token2 : the second input string associated to that transition
* @ data : data passed to the callback function if the transition is activated
*
2004-05-06 08:17:26 +04:00
* If @ to is NULL , this creates first a new target state in the automata
2003-04-13 23:53:42 +04:00
* and then adds a transition from the @ from state to the target state
* activated by the value of @ token
*
* Returns the target state or NULL in case of error
*/
xmlAutomataStatePtr
xmlAutomataNewTransition2 ( xmlAutomataPtr am , xmlAutomataStatePtr from ,
xmlAutomataStatePtr to , const xmlChar * token ,
const xmlChar * token2 , void * data ) {
xmlRegAtomPtr atom ;
if ( ( am = = NULL ) | | ( from = = NULL ) | | ( token = = NULL ) )
return ( NULL ) ;
atom = xmlRegNewAtom ( am , XML_REGEXP_STRING ) ;
if ( atom = = NULL )
return ( NULL ) ;
2006-03-10 03:36:23 +03:00
atom - > data = data ;
2003-04-13 23:53:42 +04:00
if ( ( token2 = = NULL ) | | ( * token2 = = 0 ) ) {
atom - > valuep = xmlStrdup ( token ) ;
} else {
int lenn , lenp ;
xmlChar * str ;
lenn = strlen ( ( char * ) token2 ) ;
lenp = strlen ( ( char * ) token ) ;
2003-04-19 04:07:51 +04:00
str = ( xmlChar * ) xmlMallocAtomic ( lenn + lenp + 2 ) ;
2003-04-13 23:53:42 +04:00
if ( str = = NULL ) {
xmlRegFreeAtom ( atom ) ;
return ( NULL ) ;
}
memcpy ( & str [ 0 ] , token , lenp ) ;
str [ lenp ] = ' | ' ;
memcpy ( & str [ lenp + 1 ] , token2 , lenn ) ;
str [ lenn + lenp + 1 ] = 0 ;
atom - > valuep = str ;
}
2005-07-19 18:33:55 +04:00
if ( xmlFAGenerateTransitions ( am , from , to , atom ) < 0 ) {
xmlRegFreeAtom ( atom ) ;
return ( NULL ) ;
}
if ( to = = NULL )
return ( am - > state ) ;
return ( to ) ;
}
/**
* xmlAutomataNewNegTrans :
* @ am : an automata
* @ from : the starting point of the transition
* @ to : the target point of the transition or NULL
* @ token : the first input string associated to that transition
* @ token2 : the second input string associated to that transition
* @ data : data passed to the callback function if the transition is activated
*
* If @ to is NULL , this creates first a new target state in the automata
* and then adds a transition from the @ from state to the target state
* activated by any value except ( @ token , @ token2 )
2005-08-09 15:09:52 +04:00
* Note that if @ token2 is not NULL , then ( X , NULL ) won ' t match to follow
# the semantic of XSD ##other
2005-07-19 18:33:55 +04:00
*
* Returns the target state or NULL in case of error
*/
xmlAutomataStatePtr
xmlAutomataNewNegTrans ( xmlAutomataPtr am , xmlAutomataStatePtr from ,
xmlAutomataStatePtr to , const xmlChar * token ,
const xmlChar * token2 , void * data ) {
xmlRegAtomPtr atom ;
2005-07-19 20:26:18 +04:00
xmlChar err_msg [ 200 ] ;
2005-07-19 18:33:55 +04:00
if ( ( am = = NULL ) | | ( from = = NULL ) | | ( token = = NULL ) )
return ( NULL ) ;
atom = xmlRegNewAtom ( am , XML_REGEXP_STRING ) ;
if ( atom = = NULL )
return ( NULL ) ;
atom - > data = data ;
atom - > neg = 1 ;
if ( ( token2 = = NULL ) | | ( * token2 = = 0 ) ) {
atom - > valuep = xmlStrdup ( token ) ;
} else {
int lenn , lenp ;
xmlChar * str ;
lenn = strlen ( ( char * ) token2 ) ;
lenp = strlen ( ( char * ) token ) ;
str = ( xmlChar * ) xmlMallocAtomic ( lenn + lenp + 2 ) ;
if ( str = = NULL ) {
xmlRegFreeAtom ( atom ) ;
return ( NULL ) ;
}
memcpy ( & str [ 0 ] , token , lenp ) ;
str [ lenp ] = ' | ' ;
memcpy ( & str [ lenp + 1 ] , token2 , lenn ) ;
str [ lenn + lenp + 1 ] = 0 ;
atom - > valuep = str ;
}
2005-07-30 17:18:24 +04:00
snprintf ( ( char * ) err_msg , 199 , " not %s " , ( const char * ) atom - > valuep ) ;
2005-07-19 20:26:18 +04:00
err_msg [ 199 ] = 0 ;
atom - > valuep2 = xmlStrdup ( err_msg ) ;
2005-07-19 18:33:55 +04:00
2003-04-24 20:06:47 +04:00
if ( xmlFAGenerateTransitions ( am , from , to , atom ) < 0 ) {
xmlRegFreeAtom ( atom ) ;
return ( NULL ) ;
}
2005-08-09 15:09:52 +04:00
am - > negs + + ;
2003-04-13 23:53:42 +04:00
if ( to = = NULL )
2002-04-16 19:50:10 +04:00
return ( am - > state ) ;
return ( to ) ;
}
2004-09-29 17:29:03 +04:00
/**
* xmlAutomataNewCountTrans2 :
* @ am : an automata
* @ from : the starting point of the transition
* @ to : the target point of the transition or NULL
* @ token : the input string associated to that transition
* @ token2 : the second input string associated to that transition
2019-09-30 18:04:54 +03:00
* @ min : the minimum successive occurrences of token
* @ max : the maximum successive occurrences of token
2004-09-29 17:29:03 +04:00
* @ data : data associated to the transition
*
* If @ to is NULL , this creates first a new target state in the automata
* and then adds a transition from the @ from state to the target state
2012-09-11 09:26:36 +04:00
* activated by a succession of input of value @ token and @ token2 and
2004-09-29 17:29:03 +04:00
* whose number is between @ min and @ max
*
* Returns the target state or NULL in case of error
*/
xmlAutomataStatePtr
xmlAutomataNewCountTrans2 ( xmlAutomataPtr am , xmlAutomataStatePtr from ,
xmlAutomataStatePtr to , const xmlChar * token ,
const xmlChar * token2 ,
int min , int max , void * data ) {
xmlRegAtomPtr atom ;
int counter ;
if ( ( am = = NULL ) | | ( from = = NULL ) | | ( token = = NULL ) )
return ( NULL ) ;
if ( min < 0 )
return ( NULL ) ;
if ( ( max < min ) | | ( max < 1 ) )
return ( NULL ) ;
atom = xmlRegNewAtom ( am , XML_REGEXP_STRING ) ;
if ( atom = = NULL )
return ( NULL ) ;
if ( ( token2 = = NULL ) | | ( * token2 = = 0 ) ) {
atom - > valuep = xmlStrdup ( token ) ;
2023-02-17 17:20:33 +03:00
if ( atom - > valuep = = NULL )
goto error ;
2004-09-29 17:29:03 +04:00
} else {
int lenn , lenp ;
xmlChar * str ;
lenn = strlen ( ( char * ) token2 ) ;
lenp = strlen ( ( char * ) token ) ;
str = ( xmlChar * ) xmlMallocAtomic ( lenn + lenp + 2 ) ;
2023-02-17 17:20:33 +03:00
if ( str = = NULL )
goto error ;
2004-09-29 17:29:03 +04:00
memcpy ( & str [ 0 ] , token , lenp ) ;
str [ lenp ] = ' | ' ;
memcpy ( & str [ lenp + 1 ] , token2 , lenn ) ;
str [ lenn + lenp + 1 ] = 0 ;
atom - > valuep = str ;
}
atom - > data = data ;
if ( min = = 0 )
atom - > min = 1 ;
else
atom - > min = min ;
atom - > max = max ;
/*
* associate a counter to the transition .
*/
counter = xmlRegGetCounter ( am ) ;
2023-02-17 17:53:07 +03:00
if ( counter < 0 )
goto error ;
2004-09-29 17:29:03 +04:00
am - > counters [ counter ] . min = min ;
am - > counters [ counter ] . max = max ;
/* xmlFAGenerateTransitions(am, from, to, atom); */
if ( to = = NULL ) {
2023-02-15 03:00:03 +03:00
to = xmlRegStatePush ( am ) ;
if ( to = = NULL )
2023-02-17 17:20:33 +03:00
goto error ;
2004-09-29 17:29:03 +04:00
}
2005-09-26 21:18:17 +04:00
xmlRegStateAddTrans ( am , from , atom , to , counter , - 1 ) ;
2023-02-17 17:20:33 +03:00
if ( xmlRegAtomPush ( am , atom ) < 0 )
goto error ;
2004-09-29 17:29:03 +04:00
am - > state = to ;
if ( to = = NULL )
to = am - > state ;
if ( to = = NULL )
return ( NULL ) ;
if ( min = = 0 )
xmlFAGenerateEpsilonTransition ( am , from , to ) ;
return ( to ) ;
2023-02-17 17:20:33 +03:00
error :
xmlRegFreeAtom ( atom ) ;
return ( NULL ) ;
2004-09-29 17:29:03 +04:00
}
2002-04-16 19:50:10 +04:00
/**
* xmlAutomataNewCountTrans :
* @ am : an automata
* @ from : the starting point of the transition
* @ to : the target point of the transition or NULL
* @ token : the input string associated to that transition
2019-09-30 18:04:54 +03:00
* @ min : the minimum successive occurrences of token
* @ max : the maximum successive occurrences of token
2002-12-11 17:23:49 +03:00
* @ data : data associated to the transition
2002-04-16 19:50:10 +04:00
*
2004-05-06 08:17:26 +04:00
* If @ to is NULL , this creates first a new target state in the automata
2002-04-16 19:50:10 +04:00
* and then adds a transition from the @ from state to the target state
* activated by a succession of input of value @ token and whose number
* is between @ min and @ max
*
* Returns the target state or NULL in case of error
*/
xmlAutomataStatePtr
xmlAutomataNewCountTrans ( xmlAutomataPtr am , xmlAutomataStatePtr from ,
xmlAutomataStatePtr to , const xmlChar * token ,
int min , int max , void * data ) {
xmlRegAtomPtr atom ;
2004-02-12 15:43:49 +03:00
int counter ;
2002-04-16 19:50:10 +04:00
if ( ( am = = NULL ) | | ( from = = NULL ) | | ( token = = NULL ) )
return ( NULL ) ;
if ( min < 0 )
return ( NULL ) ;
if ( ( max < min ) | | ( max < 1 ) )
return ( NULL ) ;
atom = xmlRegNewAtom ( am , XML_REGEXP_STRING ) ;
if ( atom = = NULL )
return ( NULL ) ;
atom - > valuep = xmlStrdup ( token ) ;
2023-02-17 17:20:33 +03:00
if ( atom - > valuep = = NULL )
goto error ;
2002-04-16 19:50:10 +04:00
atom - > data = data ;
if ( min = = 0 )
atom - > min = 1 ;
else
atom - > min = min ;
atom - > max = max ;
2004-02-12 15:43:49 +03:00
/*
* associate a counter to the transition .
*/
counter = xmlRegGetCounter ( am ) ;
2023-02-17 17:53:07 +03:00
if ( counter < 0 )
goto error ;
2004-02-12 15:43:49 +03:00
am - > counters [ counter ] . min = min ;
am - > counters [ counter ] . max = max ;
/* xmlFAGenerateTransitions(am, from, to, atom); */
if ( to = = NULL ) {
2023-02-15 03:00:03 +03:00
to = xmlRegStatePush ( am ) ;
if ( to = = NULL )
2023-02-17 17:20:33 +03:00
goto error ;
2003-04-24 20:06:47 +04:00
}
2005-09-26 21:18:17 +04:00
xmlRegStateAddTrans ( am , from , atom , to , counter , - 1 ) ;
2023-02-17 17:20:33 +03:00
if ( xmlRegAtomPush ( am , atom ) < 0 )
goto error ;
2004-02-12 15:43:49 +03:00
am - > state = to ;
2002-04-16 19:50:10 +04:00
if ( to = = NULL )
to = am - > state ;
if ( to = = NULL )
return ( NULL ) ;
if ( min = = 0 )
xmlFAGenerateEpsilonTransition ( am , from , to ) ;
return ( to ) ;
2023-02-17 17:20:33 +03:00
error :
xmlRegFreeAtom ( atom ) ;
return ( NULL ) ;
2002-04-16 19:50:10 +04:00
}
2004-09-29 17:29:03 +04:00
/**
* xmlAutomataNewOnceTrans2 :
* @ am : an automata
* @ from : the starting point of the transition
* @ to : the target point of the transition or NULL
* @ token : the input string associated to that transition
* @ token2 : the second input string associated to that transition
2019-09-30 18:04:54 +03:00
* @ min : the minimum successive occurrences of token
* @ max : the maximum successive occurrences of token
2004-09-29 17:29:03 +04:00
* @ data : data associated to the transition
*
* If @ to is NULL , this creates first a new target state in the automata
* and then adds a transition from the @ from state to the target state
2012-09-11 09:26:36 +04:00
* activated by a succession of input of value @ token and @ token2 and whose
* number is between @ min and @ max , moreover that transition can only be
2004-09-29 17:29:03 +04:00
* crossed once .
*
* Returns the target state or NULL in case of error
*/
xmlAutomataStatePtr
xmlAutomataNewOnceTrans2 ( xmlAutomataPtr am , xmlAutomataStatePtr from ,
xmlAutomataStatePtr to , const xmlChar * token ,
const xmlChar * token2 ,
int min , int max , void * data ) {
xmlRegAtomPtr atom ;
int counter ;
if ( ( am = = NULL ) | | ( from = = NULL ) | | ( token = = NULL ) )
return ( NULL ) ;
if ( min < 1 )
return ( NULL ) ;
2020-01-02 16:14:48 +03:00
if ( max < min )
2004-09-29 17:29:03 +04:00
return ( NULL ) ;
atom = xmlRegNewAtom ( am , XML_REGEXP_STRING ) ;
if ( atom = = NULL )
return ( NULL ) ;
if ( ( token2 = = NULL ) | | ( * token2 = = 0 ) ) {
atom - > valuep = xmlStrdup ( token ) ;
2023-02-17 17:20:33 +03:00
if ( atom - > valuep = = NULL )
goto error ;
2004-09-29 17:29:03 +04:00
} else {
int lenn , lenp ;
xmlChar * str ;
lenn = strlen ( ( char * ) token2 ) ;
lenp = strlen ( ( char * ) token ) ;
str = ( xmlChar * ) xmlMallocAtomic ( lenn + lenp + 2 ) ;
2023-02-17 17:20:33 +03:00
if ( str = = NULL )
goto error ;
2004-09-29 17:29:03 +04:00
memcpy ( & str [ 0 ] , token , lenp ) ;
str [ lenp ] = ' | ' ;
memcpy ( & str [ lenp + 1 ] , token2 , lenn ) ;
str [ lenn + lenp + 1 ] = 0 ;
atom - > valuep = str ;
2012-09-11 09:26:36 +04:00
}
2004-09-29 17:29:03 +04:00
atom - > data = data ;
atom - > quant = XML_REGEXP_QUANT_ONCEONLY ;
2006-03-10 03:36:23 +03:00
atom - > min = min ;
2004-09-29 17:29:03 +04:00
atom - > max = max ;
/*
* associate a counter to the transition .
*/
counter = xmlRegGetCounter ( am ) ;
2023-02-17 17:53:07 +03:00
if ( counter < 0 )
goto error ;
2004-09-29 17:29:03 +04:00
am - > counters [ counter ] . min = 1 ;
am - > counters [ counter ] . max = 1 ;
/* xmlFAGenerateTransitions(am, from, to, atom); */
if ( to = = NULL ) {
2023-02-15 03:00:03 +03:00
to = xmlRegStatePush ( am ) ;
if ( to = = NULL )
2023-02-17 17:20:33 +03:00
goto error ;
2004-09-29 17:29:03 +04:00
}
2005-09-26 21:18:17 +04:00
xmlRegStateAddTrans ( am , from , atom , to , counter , - 1 ) ;
2023-02-17 17:20:33 +03:00
if ( xmlRegAtomPush ( am , atom ) < 0 )
goto error ;
2004-09-29 17:29:03 +04:00
am - > state = to ;
return ( to ) ;
2023-02-17 17:20:33 +03:00
error :
xmlRegFreeAtom ( atom ) ;
return ( NULL ) ;
2004-09-29 17:29:03 +04:00
}
2012-09-11 09:26:36 +04:00
2004-09-29 17:29:03 +04:00
2002-04-20 10:41:40 +04:00
/**
* xmlAutomataNewOnceTrans :
* @ am : an automata
* @ from : the starting point of the transition
* @ to : the target point of the transition or NULL
* @ token : the input string associated to that transition
2019-09-30 18:04:54 +03:00
* @ min : the minimum successive occurrences of token
* @ max : the maximum successive occurrences of token
2002-12-11 17:23:49 +03:00
* @ data : data associated to the transition
2002-04-20 10:41:40 +04:00
*
2004-05-06 08:17:26 +04:00
* If @ to is NULL , this creates first a new target state in the automata
2002-04-20 10:41:40 +04:00
* and then adds a transition from the @ from state to the target state
* activated by a succession of input of value @ token and whose number
2004-05-06 08:17:26 +04:00
* is between @ min and @ max , moreover that transition can only be crossed
2002-04-20 10:41:40 +04:00
* once .
*
* Returns the target state or NULL in case of error
*/
xmlAutomataStatePtr
xmlAutomataNewOnceTrans ( xmlAutomataPtr am , xmlAutomataStatePtr from ,
xmlAutomataStatePtr to , const xmlChar * token ,
int min , int max , void * data ) {
xmlRegAtomPtr atom ;
int counter ;
if ( ( am = = NULL ) | | ( from = = NULL ) | | ( token = = NULL ) )
return ( NULL ) ;
if ( min < 1 )
return ( NULL ) ;
2020-01-02 16:14:48 +03:00
if ( max < min )
2002-04-20 10:41:40 +04:00
return ( NULL ) ;
atom = xmlRegNewAtom ( am , XML_REGEXP_STRING ) ;
if ( atom = = NULL )
return ( NULL ) ;
atom - > valuep = xmlStrdup ( token ) ;
atom - > data = data ;
atom - > quant = XML_REGEXP_QUANT_ONCEONLY ;
2006-03-10 03:36:23 +03:00
atom - > min = min ;
2002-04-20 10:41:40 +04:00
atom - > max = max ;
/*
* associate a counter to the transition .
*/
counter = xmlRegGetCounter ( am ) ;
2023-02-17 17:53:07 +03:00
if ( counter < 0 )
goto error ;
2002-04-20 10:41:40 +04:00
am - > counters [ counter ] . min = 1 ;
am - > counters [ counter ] . max = 1 ;
/* xmlFAGenerateTransitions(am, from, to, atom); */
if ( to = = NULL ) {
2023-02-15 03:00:03 +03:00
to = xmlRegStatePush ( am ) ;
if ( to = = NULL )
2023-02-17 17:20:33 +03:00
goto error ;
2002-04-20 10:41:40 +04:00
}
2005-09-26 21:18:17 +04:00
xmlRegStateAddTrans ( am , from , atom , to , counter , - 1 ) ;
2023-02-17 17:20:33 +03:00
if ( xmlRegAtomPush ( am , atom ) < 0 )
goto error ;
2002-04-20 10:41:40 +04:00
am - > state = to ;
return ( to ) ;
2023-02-17 17:20:33 +03:00
error :
xmlRegFreeAtom ( atom ) ;
return ( NULL ) ;
2002-04-20 10:41:40 +04:00
}
2002-04-16 19:50:10 +04:00
/**
* xmlAutomataNewState :
* @ am : an automata
*
* Create a new disconnected state in the automata
*
* Returns the new state or NULL in case of error
*/
xmlAutomataStatePtr
xmlAutomataNewState ( xmlAutomataPtr am ) {
if ( am = = NULL )
return ( NULL ) ;
2023-02-15 03:00:03 +03:00
return ( xmlRegStatePush ( am ) ) ;
2002-04-16 19:50:10 +04:00
}
/**
2002-12-11 17:23:49 +03:00
* xmlAutomataNewEpsilon :
2002-04-16 19:50:10 +04:00
* @ am : an automata
* @ from : the starting point of the transition
* @ to : the target point of the transition or NULL
*
2004-05-06 08:17:26 +04:00
* If @ to is NULL , this creates first a new target state in the automata
* and then adds an epsilon transition from the @ from state to the
2002-04-16 19:50:10 +04:00
* target state
*
* Returns the target state or NULL in case of error
*/
xmlAutomataStatePtr
xmlAutomataNewEpsilon ( xmlAutomataPtr am , xmlAutomataStatePtr from ,
xmlAutomataStatePtr to ) {
if ( ( am = = NULL ) | | ( from = = NULL ) )
return ( NULL ) ;
xmlFAGenerateEpsilonTransition ( am , from , to ) ;
if ( to = = NULL )
return ( am - > state ) ;
return ( to ) ;
}
2002-04-20 10:41:40 +04:00
/**
* xmlAutomataNewAllTrans :
* @ am : an automata
* @ from : the starting point of the transition
* @ to : the target point of the transition or NULL
2002-12-11 17:23:49 +03:00
* @ lax : allow to transition if not all all transitions have been activated
2002-04-20 10:41:40 +04:00
*
2004-05-06 08:17:26 +04:00
* If @ to is NULL , this creates first a new target state in the automata
2002-04-20 10:41:40 +04:00
* and then adds a an ALL transition from the @ from state to the
* target state . That transition is an epsilon transition allowed only when
* all transitions from the @ from node have been activated .
*
* Returns the target state or NULL in case of error
*/
xmlAutomataStatePtr
xmlAutomataNewAllTrans ( xmlAutomataPtr am , xmlAutomataStatePtr from ,
2002-04-20 21:38:48 +04:00
xmlAutomataStatePtr to , int lax ) {
2002-04-20 10:41:40 +04:00
if ( ( am = = NULL ) | | ( from = = NULL ) )
return ( NULL ) ;
2002-04-20 21:38:48 +04:00
xmlFAGenerateAllTransition ( am , from , to , lax ) ;
2002-04-20 10:41:40 +04:00
if ( to = = NULL )
return ( am - > state ) ;
return ( to ) ;
}
2002-04-17 20:28:10 +04:00
/**
* xmlAutomataNewCounter :
* @ am : an automata
* @ min : the minimal value on the counter
* @ max : the maximal value on the counter
*
* Create a new counter
*
* Returns the counter number or - 1 in case of error
*/
2012-09-11 09:26:36 +04:00
int
2002-04-17 20:28:10 +04:00
xmlAutomataNewCounter ( xmlAutomataPtr am , int min , int max ) {
int ret ;
if ( am = = NULL )
return ( - 1 ) ;
ret = xmlRegGetCounter ( am ) ;
if ( ret < 0 )
return ( - 1 ) ;
am - > counters [ ret ] . min = min ;
am - > counters [ ret ] . max = max ;
return ( ret ) ;
}
/**
* xmlAutomataNewCountedTrans :
* @ am : an automata
* @ from : the starting point of the transition
* @ to : the target point of the transition or NULL
* @ counter : the counter associated to that transition
*
2004-05-06 08:17:26 +04:00
* If @ to is NULL , this creates first a new target state in the automata
2002-04-17 20:28:10 +04:00
* and then adds an epsilon transition from the @ from state to the target state
* which will increment the counter provided
*
* Returns the target state or NULL in case of error
*/
xmlAutomataStatePtr
xmlAutomataNewCountedTrans ( xmlAutomataPtr am , xmlAutomataStatePtr from ,
xmlAutomataStatePtr to , int counter ) {
if ( ( am = = NULL ) | | ( from = = NULL ) | | ( counter < 0 ) )
return ( NULL ) ;
xmlFAGenerateCountedEpsilonTransition ( am , from , to , counter ) ;
if ( to = = NULL )
return ( am - > state ) ;
return ( to ) ;
}
/**
* xmlAutomataNewCounterTrans :
* @ am : an automata
* @ from : the starting point of the transition
* @ to : the target point of the transition or NULL
* @ counter : the counter associated to that transition
*
2004-05-06 08:17:26 +04:00
* If @ to is NULL , this creates first a new target state in the automata
2002-04-17 20:28:10 +04:00
* and then adds an epsilon transition from the @ from state to the target state
* which will be allowed only if the counter is within the right range .
*
* Returns the target state or NULL in case of error
*/
xmlAutomataStatePtr
xmlAutomataNewCounterTrans ( xmlAutomataPtr am , xmlAutomataStatePtr from ,
xmlAutomataStatePtr to , int counter ) {
if ( ( am = = NULL ) | | ( from = = NULL ) | | ( counter < 0 ) )
return ( NULL ) ;
xmlFAGenerateCountedTransition ( am , from , to , counter ) ;
if ( to = = NULL )
return ( am - > state ) ;
return ( to ) ;
}
2002-04-16 19:50:10 +04:00
/**
* xmlAutomataCompile :
* @ am : an automata
*
* Compile the automata into a Reg Exp ready for being executed .
* The automata should be free after this point .
*
* Returns the compiled regexp or NULL in case of error
*/
2012-09-11 09:26:36 +04:00
xmlRegexpPtr
2002-04-16 19:50:10 +04:00
xmlAutomataCompile ( xmlAutomataPtr am ) {
xmlRegexpPtr ret ;
2003-04-24 20:06:47 +04:00
if ( ( am = = NULL ) | | ( am - > error ! = 0 ) ) return ( NULL ) ;
2002-04-16 19:50:10 +04:00
xmlFAEliminateEpsilonTransitions ( am ) ;
2023-12-10 19:09:45 +03:00
if ( am - > error ! = 0 )
return ( NULL ) ;
2002-09-19 23:56:43 +04:00
/* xmlFAComputesDeterminism(am); */
2002-04-16 19:50:10 +04:00
ret = xmlRegEpxFromParse ( am ) ;
return ( ret ) ;
}
2002-04-22 20:01:24 +04:00
/**
* xmlAutomataIsDeterminist :
* @ am : an automata
*
* Checks if an automata is determinist .
*
* Returns 1 if true , 0 if not , and - 1 in case of error
*/
2012-09-11 09:26:36 +04:00
int
2002-04-22 20:01:24 +04:00
xmlAutomataIsDeterminist ( xmlAutomataPtr am ) {
int ret ;
if ( am = = NULL )
return ( - 1 ) ;
ret = xmlFAComputesDeterminism ( am ) ;
return ( ret ) ;
}
2005-08-22 04:20:58 +04:00
# ifdef LIBXML_EXPR_ENABLED
2024-06-16 01:11:41 +03:00
/** DOC_DISABLE */
2005-08-22 04:20:58 +04:00
/************************************************************************
* *
* Formal Expression handling code *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2024-06-16 01:11:41 +03:00
/*
* Formal regular expression handling
* Its goal is to do some formal work on content models
*/
/* expressions are used within a context */
typedef struct _xmlExpCtxt xmlExpCtxt ;
typedef xmlExpCtxt * xmlExpCtxtPtr ;
XMLPUBFUN void
xmlExpFreeCtxt ( xmlExpCtxtPtr ctxt ) ;
XMLPUBFUN xmlExpCtxtPtr
xmlExpNewCtxt ( int maxNodes ,
xmlDictPtr dict ) ;
XMLPUBFUN int
xmlExpCtxtNbNodes ( xmlExpCtxtPtr ctxt ) ;
XMLPUBFUN int
xmlExpCtxtNbCons ( xmlExpCtxtPtr ctxt ) ;
/* Expressions are trees but the tree is opaque */
typedef struct _xmlExpNode xmlExpNode ;
typedef xmlExpNode * xmlExpNodePtr ;
typedef enum {
XML_EXP_EMPTY = 0 ,
XML_EXP_FORBID = 1 ,
XML_EXP_ATOM = 2 ,
XML_EXP_SEQ = 3 ,
XML_EXP_OR = 4 ,
XML_EXP_COUNT = 5
} xmlExpNodeType ;
/*
* 2 core expressions shared by all for the empty language set
* and for the set with just the empty token
*/
XMLPUBVAR xmlExpNodePtr forbiddenExp ;
XMLPUBVAR xmlExpNodePtr emptyExp ;
/*
* Expressions are reference counted internally
*/
XMLPUBFUN void
xmlExpFree ( xmlExpCtxtPtr ctxt ,
xmlExpNodePtr expr ) ;
XMLPUBFUN void
xmlExpRef ( xmlExpNodePtr expr ) ;
/*
* constructors can be either manual or from a string
*/
XMLPUBFUN xmlExpNodePtr
xmlExpParse ( xmlExpCtxtPtr ctxt ,
const char * expr ) ;
XMLPUBFUN xmlExpNodePtr
xmlExpNewAtom ( xmlExpCtxtPtr ctxt ,
const xmlChar * name ,
int len ) ;
XMLPUBFUN xmlExpNodePtr
xmlExpNewOr ( xmlExpCtxtPtr ctxt ,
xmlExpNodePtr left ,
xmlExpNodePtr right ) ;
XMLPUBFUN xmlExpNodePtr
xmlExpNewSeq ( xmlExpCtxtPtr ctxt ,
xmlExpNodePtr left ,
xmlExpNodePtr right ) ;
XMLPUBFUN xmlExpNodePtr
xmlExpNewRange ( xmlExpCtxtPtr ctxt ,
xmlExpNodePtr subset ,
int min ,
int max ) ;
/*
* The really interesting APIs
*/
XMLPUBFUN int
xmlExpIsNillable ( xmlExpNodePtr expr ) ;
XMLPUBFUN int
xmlExpMaxToken ( xmlExpNodePtr expr ) ;
XMLPUBFUN int
xmlExpGetLanguage ( xmlExpCtxtPtr ctxt ,
xmlExpNodePtr expr ,
const xmlChar * * langList ,
int len ) ;
XMLPUBFUN int
xmlExpGetStart ( xmlExpCtxtPtr ctxt ,
xmlExpNodePtr expr ,
const xmlChar * * tokList ,
int len ) ;
XMLPUBFUN xmlExpNodePtr
xmlExpStringDerive ( xmlExpCtxtPtr ctxt ,
xmlExpNodePtr expr ,
const xmlChar * str ,
int len ) ;
XMLPUBFUN xmlExpNodePtr
xmlExpExpDerive ( xmlExpCtxtPtr ctxt ,
xmlExpNodePtr expr ,
xmlExpNodePtr sub ) ;
XMLPUBFUN int
xmlExpSubsume ( xmlExpCtxtPtr ctxt ,
xmlExpNodePtr expr ,
xmlExpNodePtr sub ) ;
XMLPUBFUN void
xmlExpDump ( xmlBufferPtr buf ,
xmlExpNodePtr expr ) ;
2005-08-22 04:20:58 +04:00
/************************************************************************
* *
* Expression handling context *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
struct _xmlExpCtxt {
xmlDictPtr dict ;
xmlExpNodePtr * table ;
int size ;
int nbElems ;
int nb_nodes ;
2009-09-07 16:58:47 +04:00
int maxNodes ;
2005-08-22 04:20:58 +04:00
const char * expr ;
const char * cur ;
int nb_cons ;
int tabSize ;
} ;
/**
* xmlExpNewCtxt :
* @ maxNodes : the maximum number of nodes
2016-04-13 17:56:07 +03:00
* @ dict : optional dictionary to use internally
2005-08-22 04:20:58 +04:00
*
* Creates a new context for manipulating expressions
*
* Returns the context or NULL in case of error
*/
xmlExpCtxtPtr
xmlExpNewCtxt ( int maxNodes , xmlDictPtr dict ) {
xmlExpCtxtPtr ret ;
int size = 256 ;
if ( maxNodes < = 4096 )
maxNodes = 4096 ;
2012-09-11 09:26:36 +04:00
2005-08-22 04:20:58 +04:00
ret = ( xmlExpCtxtPtr ) xmlMalloc ( sizeof ( xmlExpCtxt ) ) ;
if ( ret = = NULL )
return ( NULL ) ;
memset ( ret , 0 , sizeof ( xmlExpCtxt ) ) ;
ret - > size = size ;
ret - > nbElems = 0 ;
2009-09-07 16:58:47 +04:00
ret - > maxNodes = maxNodes ;
2005-08-22 04:20:58 +04:00
ret - > table = xmlMalloc ( size * sizeof ( xmlExpNodePtr ) ) ;
if ( ret - > table = = NULL ) {
xmlFree ( ret ) ;
return ( NULL ) ;
}
memset ( ret - > table , 0 , size * sizeof ( xmlExpNodePtr ) ) ;
if ( dict = = NULL ) {
ret - > dict = xmlDictCreate ( ) ;
if ( ret - > dict = = NULL ) {
xmlFree ( ret - > table ) ;
xmlFree ( ret ) ;
return ( NULL ) ;
}
} else {
ret - > dict = dict ;
xmlDictReference ( ret - > dict ) ;
}
return ( ret ) ;
}
/**
* xmlExpFreeCtxt :
* @ ctxt : an expression context
*
* Free an expression context
*/
void
xmlExpFreeCtxt ( xmlExpCtxtPtr ctxt ) {
if ( ctxt = = NULL )
return ;
xmlDictFree ( ctxt - > dict ) ;
if ( ctxt - > table ! = NULL )
xmlFree ( ctxt - > table ) ;
xmlFree ( ctxt ) ;
}
/************************************************************************
* *
* Structure associated to an expression node *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2005-08-22 16:07:04 +04:00
# define MAX_NODES 10000
/*
2012-09-11 09:26:36 +04:00
* TODO :
2005-08-22 16:07:04 +04:00
* - Wildcards
* - public API for creation
*
* Started
* - regression testing
*
* Done
* - split into module and test tool
* - memleaks
*/
2005-08-22 04:20:58 +04:00
typedef enum {
XML_EXP_NILABLE = ( 1 < < 0 )
} xmlExpNodeInfo ;
# define IS_NILLABLE(node) ((node)->info & XML_EXP_NILABLE)
struct _xmlExpNode {
unsigned char type ; /* xmlExpNodeType */
unsigned char info ; /* OR of xmlExpNodeInfo */
unsigned short key ; /* the hash key */
unsigned int ref ; /* The number of references */
int c_max ; /* the maximum length it can consume */
xmlExpNodePtr exp_left ;
xmlExpNodePtr next ; /* the next node in the hash table or free list */
union {
struct {
int f_min ;
int f_max ;
} count ;
struct {
xmlExpNodePtr f_right ;
} children ;
const xmlChar * f_str ;
} field ;
} ;
# define exp_min field.count.f_min
# define exp_max field.count.f_max
/* #define exp_left field.children.f_left */
# define exp_right field.children.f_right
# define exp_str field.f_str
static xmlExpNodePtr xmlExpNewNode ( xmlExpCtxtPtr ctxt , xmlExpNodeType type ) ;
static xmlExpNode forbiddenExpNode = {
XML_EXP_FORBID , 0 , 0 , 0 , 0 , NULL , NULL , { { 0 , 0 } }
} ;
xmlExpNodePtr forbiddenExp = & forbiddenExpNode ;
static xmlExpNode emptyExpNode = {
XML_EXP_EMPTY , 1 , 0 , 0 , 0 , NULL , NULL , { { 0 , 0 } }
} ;
xmlExpNodePtr emptyExp = & emptyExpNode ;
/************************************************************************
* *
* The custom hash table for unicity and canonicalization *
* of sub - expressions pointers *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/*
* xmlExpHashNameComputeKey :
* Calculate the hash key for a token
*/
static unsigned short
xmlExpHashNameComputeKey ( const xmlChar * name ) {
unsigned short value = 0L ;
char ch ;
2012-09-11 09:26:36 +04:00
2005-08-22 04:20:58 +04:00
if ( name ! = NULL ) {
value + = 30 * ( * name ) ;
while ( ( ch = * name + + ) ! = 0 ) {
value = value ^ ( ( value < < 5 ) + ( value > > 3 ) + ( unsigned long ) ch ) ;
}
}
return ( value ) ;
}
/*
* xmlExpHashComputeKey :
* Calculate the hash key for a compound expression
*/
static unsigned short
xmlExpHashComputeKey ( xmlExpNodeType type , xmlExpNodePtr left ,
xmlExpNodePtr right ) {
unsigned long value ;
unsigned short ret ;
2012-09-11 09:26:36 +04:00
2005-08-22 04:20:58 +04:00
switch ( type ) {
case XML_EXP_SEQ :
value = left - > key ;
value + = right - > key ;
value * = 3 ;
ret = ( unsigned short ) value ;
break ;
case XML_EXP_OR :
value = left - > key ;
value + = right - > key ;
value * = 7 ;
ret = ( unsigned short ) value ;
break ;
case XML_EXP_COUNT :
value = left - > key ;
value + = right - > key ;
ret = ( unsigned short ) value ;
break ;
default :
ret = 0 ;
}
return ( ret ) ;
}
static xmlExpNodePtr
xmlExpNewNode ( xmlExpCtxtPtr ctxt , xmlExpNodeType type ) {
xmlExpNodePtr ret ;
if ( ctxt - > nb_nodes > = MAX_NODES )
return ( NULL ) ;
ret = ( xmlExpNodePtr ) xmlMalloc ( sizeof ( xmlExpNode ) ) ;
if ( ret = = NULL )
return ( NULL ) ;
memset ( ret , 0 , sizeof ( xmlExpNode ) ) ;
ret - > type = type ;
ret - > next = NULL ;
ctxt - > nb_nodes + + ;
ctxt - > nb_cons + + ;
return ( ret ) ;
}
/**
* xmlExpHashGetEntry :
* @ table : the hash table
*
* Get the unique entry from the hash table . The entry is created if
* needed . @ left and @ right are consumed , i . e . their ref count will
* be decremented by the operation .
*
* Returns the pointer or NULL in case of error
*/
static xmlExpNodePtr
xmlExpHashGetEntry ( xmlExpCtxtPtr ctxt , xmlExpNodeType type ,
xmlExpNodePtr left , xmlExpNodePtr right ,
const xmlChar * name , int min , int max ) {
unsigned short kbase , key ;
xmlExpNodePtr entry ;
xmlExpNodePtr insert ;
if ( ctxt = = NULL )
return ( NULL ) ;
/*
* Check for duplicate and insertion location .
*/
if ( type = = XML_EXP_ATOM ) {
kbase = xmlExpHashNameComputeKey ( name ) ;
} else if ( type = = XML_EXP_COUNT ) {
/* COUNT reduction rule 1 */
/* a{1} -> a */
if ( min = = max ) {
if ( min = = 1 ) {
return ( left ) ;
}
if ( min = = 0 ) {
xmlExpFree ( ctxt , left ) ;
return ( emptyExp ) ;
}
}
if ( min < 0 ) {
xmlExpFree ( ctxt , left ) ;
return ( forbiddenExp ) ;
}
if ( max = = - 1 )
kbase = min + 79 ;
else
kbase = max - min ;
kbase + = left - > key ;
} else if ( type = = XML_EXP_OR ) {
/* Forbid reduction rules */
if ( left - > type = = XML_EXP_FORBID ) {
xmlExpFree ( ctxt , left ) ;
return ( right ) ;
}
if ( right - > type = = XML_EXP_FORBID ) {
xmlExpFree ( ctxt , right ) ;
return ( left ) ;
}
/* OR reduction rule 1 */
/* a | a reduced to a */
if ( left = = right ) {
2019-09-25 16:27:45 +03:00
xmlExpFree ( ctxt , right ) ;
2005-08-22 04:20:58 +04:00
return ( left ) ;
}
/* OR canonicalization rule 1 */
/* linearize (a | b) | c into a | (b | c) */
if ( ( left - > type = = XML_EXP_OR ) & & ( right - > type ! = XML_EXP_OR ) ) {
xmlExpNodePtr tmp = left ;
left = right ;
right = tmp ;
}
/* OR reduction rule 2 */
/* a | (a | b) and b | (a | b) are reduced to a | b */
if ( right - > type = = XML_EXP_OR ) {
if ( ( left = = right - > exp_left ) | |
( left = = right - > exp_right ) ) {
xmlExpFree ( ctxt , left ) ;
return ( right ) ;
}
}
/* OR canonicalization rule 2 */
/* linearize (a | b) | c into a | (b | c) */
if ( left - > type = = XML_EXP_OR ) {
xmlExpNodePtr tmp ;
/* OR canonicalization rule 2 */
if ( ( left - > exp_right - > type ! = XML_EXP_OR ) & &
( left - > exp_right - > key < left - > exp_left - > key ) ) {
tmp = left - > exp_right ;
left - > exp_right = left - > exp_left ;
left - > exp_left = tmp ;
}
left - > exp_right - > ref + + ;
tmp = xmlExpHashGetEntry ( ctxt , XML_EXP_OR , left - > exp_right , right ,
NULL , 0 , 0 ) ;
left - > exp_left - > ref + + ;
tmp = xmlExpHashGetEntry ( ctxt , XML_EXP_OR , left - > exp_left , tmp ,
NULL , 0 , 0 ) ;
2012-09-11 09:26:36 +04:00
2005-08-22 04:20:58 +04:00
xmlExpFree ( ctxt , left ) ;
return ( tmp ) ;
}
if ( right - > type = = XML_EXP_OR ) {
/* Ordering in the tree */
/* C | (A | B) -> A | (B | C) */
if ( left - > key > right - > exp_right - > key ) {
xmlExpNodePtr tmp ;
right - > exp_right - > ref + + ;
tmp = xmlExpHashGetEntry ( ctxt , XML_EXP_OR , right - > exp_right ,
left , NULL , 0 , 0 ) ;
right - > exp_left - > ref + + ;
tmp = xmlExpHashGetEntry ( ctxt , XML_EXP_OR , right - > exp_left ,
tmp , NULL , 0 , 0 ) ;
xmlExpFree ( ctxt , right ) ;
return ( tmp ) ;
}
/* Ordering in the tree */
/* B | (A | C) -> A | (B | C) */
if ( left - > key > right - > exp_left - > key ) {
xmlExpNodePtr tmp ;
right - > exp_right - > ref + + ;
tmp = xmlExpHashGetEntry ( ctxt , XML_EXP_OR , left ,
right - > exp_right , NULL , 0 , 0 ) ;
right - > exp_left - > ref + + ;
tmp = xmlExpHashGetEntry ( ctxt , XML_EXP_OR , right - > exp_left ,
tmp , NULL , 0 , 0 ) ;
xmlExpFree ( ctxt , right ) ;
return ( tmp ) ;
}
}
/* we know both types are != XML_EXP_OR here */
else if ( left - > key > right - > key ) {
xmlExpNodePtr tmp = left ;
left = right ;
right = tmp ;
}
kbase = xmlExpHashComputeKey ( type , left , right ) ;
} else if ( type = = XML_EXP_SEQ ) {
/* Forbid reduction rules */
if ( left - > type = = XML_EXP_FORBID ) {
xmlExpFree ( ctxt , right ) ;
return ( left ) ;
}
if ( right - > type = = XML_EXP_FORBID ) {
xmlExpFree ( ctxt , left ) ;
return ( right ) ;
}
/* Empty reduction rules */
if ( right - > type = = XML_EXP_EMPTY ) {
return ( left ) ;
}
if ( left - > type = = XML_EXP_EMPTY ) {
return ( right ) ;
}
kbase = xmlExpHashComputeKey ( type , left , right ) ;
2012-09-11 09:26:36 +04:00
} else
2005-08-22 04:20:58 +04:00
return ( NULL ) ;
key = kbase % ctxt - > size ;
if ( ctxt - > table [ key ] ! = NULL ) {
for ( insert = ctxt - > table [ key ] ; insert ! = NULL ;
insert = insert - > next ) {
if ( ( insert - > key = = kbase ) & &
( insert - > type = = type ) ) {
if ( type = = XML_EXP_ATOM ) {
if ( name = = insert - > exp_str ) {
insert - > ref + + ;
return ( insert ) ;
}
} else if ( type = = XML_EXP_COUNT ) {
if ( ( insert - > exp_min = = min ) & & ( insert - > exp_max = = max ) & &
( insert - > exp_left = = left ) ) {
insert - > ref + + ;
left - > ref - - ;
return ( insert ) ;
}
} else if ( ( insert - > exp_left = = left ) & &
( insert - > exp_right = = right ) ) {
insert - > ref + + ;
left - > ref - - ;
right - > ref - - ;
return ( insert ) ;
}
}
}
}
entry = xmlExpNewNode ( ctxt , type ) ;
if ( entry = = NULL )
return ( NULL ) ;
entry - > key = kbase ;
if ( type = = XML_EXP_ATOM ) {
entry - > exp_str = name ;
entry - > c_max = 1 ;
} else if ( type = = XML_EXP_COUNT ) {
entry - > exp_min = min ;
entry - > exp_max = max ;
entry - > exp_left = left ;
if ( ( min = = 0 ) | | ( IS_NILLABLE ( left ) ) )
entry - > info | = XML_EXP_NILABLE ;
if ( max < 0 )
entry - > c_max = - 1 ;
else
entry - > c_max = max * entry - > exp_left - > c_max ;
} else {
entry - > exp_left = left ;
entry - > exp_right = right ;
if ( type = = XML_EXP_OR ) {
if ( ( IS_NILLABLE ( left ) ) | | ( IS_NILLABLE ( right ) ) )
entry - > info | = XML_EXP_NILABLE ;
if ( ( entry - > exp_left - > c_max = = - 1 ) | |
( entry - > exp_right - > c_max = = - 1 ) )
entry - > c_max = - 1 ;
else if ( entry - > exp_left - > c_max > entry - > exp_right - > c_max )
entry - > c_max = entry - > exp_left - > c_max ;
else
entry - > c_max = entry - > exp_right - > c_max ;
} else {
if ( ( IS_NILLABLE ( left ) ) & & ( IS_NILLABLE ( right ) ) )
entry - > info | = XML_EXP_NILABLE ;
if ( ( entry - > exp_left - > c_max = = - 1 ) | |
( entry - > exp_right - > c_max = = - 1 ) )
entry - > c_max = - 1 ;
else
entry - > c_max = entry - > exp_left - > c_max + entry - > exp_right - > c_max ;
}
}
entry - > ref = 1 ;
if ( ctxt - > table [ key ] ! = NULL )
entry - > next = ctxt - > table [ key ] ;
ctxt - > table [ key ] = entry ;
ctxt - > nbElems + + ;
return ( entry ) ;
}
/**
* xmlExpFree :
* @ ctxt : the expression context
* @ exp : the expression
*
* Dereference the expression
*/
void
xmlExpFree ( xmlExpCtxtPtr ctxt , xmlExpNodePtr exp ) {
if ( ( exp = = NULL ) | | ( exp = = forbiddenExp ) | | ( exp = = emptyExp ) )
return ;
exp - > ref - - ;
if ( exp - > ref = = 0 ) {
unsigned short key ;
/* Unlink it first from the hash table */
key = exp - > key % ctxt - > size ;
if ( ctxt - > table [ key ] = = exp ) {
ctxt - > table [ key ] = exp - > next ;
} else {
xmlExpNodePtr tmp ;
tmp = ctxt - > table [ key ] ;
while ( tmp ! = NULL ) {
if ( tmp - > next = = exp ) {
tmp - > next = exp - > next ;
break ;
}
tmp = tmp - > next ;
}
}
if ( ( exp - > type = = XML_EXP_SEQ ) | | ( exp - > type = = XML_EXP_OR ) ) {
xmlExpFree ( ctxt , exp - > exp_left ) ;
xmlExpFree ( ctxt , exp - > exp_right ) ;
} else if ( exp - > type = = XML_EXP_COUNT ) {
xmlExpFree ( ctxt , exp - > exp_left ) ;
}
xmlFree ( exp ) ;
ctxt - > nb_nodes - - ;
}
}
/**
* xmlExpRef :
* @ exp : the expression
*
* Increase the reference count of the expression
*/
void
xmlExpRef ( xmlExpNodePtr exp ) {
if ( exp ! = NULL )
exp - > ref + + ;
}
2005-08-23 17:41:17 +04:00
/**
* xmlExpNewAtom :
* @ ctxt : the expression context
* @ name : the atom name
2012-09-28 10:59:33 +04:00
* @ len : the atom name length in byte ( or - 1 ) ;
2005-08-23 17:41:17 +04:00
*
* Get the atom associated to this name from that context
*
* Returns the node or NULL in case of error
*/
xmlExpNodePtr
xmlExpNewAtom ( xmlExpCtxtPtr ctxt , const xmlChar * name , int len ) {
if ( ( ctxt = = NULL ) | | ( name = = NULL ) )
return ( NULL ) ;
name = xmlDictLookup ( ctxt - > dict , name , len ) ;
if ( name = = NULL )
return ( NULL ) ;
return ( xmlExpHashGetEntry ( ctxt , XML_EXP_ATOM , NULL , NULL , name , 0 , 0 ) ) ;
}
/**
* xmlExpNewOr :
* @ ctxt : the expression context
* @ left : left expression
* @ right : right expression
*
* Get the atom associated to the choice @ left | @ right
* Note that @ left and @ right are consumed in the operation , to keep
* an handle on them use xmlExpRef ( ) and use xmlExpFree ( ) to release them ,
* this is true even in case of failure ( unless ctxt = = NULL ) .
*
* Returns the node or NULL in case of error
*/
xmlExpNodePtr
xmlExpNewOr ( xmlExpCtxtPtr ctxt , xmlExpNodePtr left , xmlExpNodePtr right ) {
2006-03-10 03:36:23 +03:00
if ( ctxt = = NULL )
return ( NULL ) ;
if ( ( left = = NULL ) | | ( right = = NULL ) ) {
2005-08-23 17:41:17 +04:00
xmlExpFree ( ctxt , left ) ;
xmlExpFree ( ctxt , right ) ;
return ( NULL ) ;
}
return ( xmlExpHashGetEntry ( ctxt , XML_EXP_OR , left , right , NULL , 0 , 0 ) ) ;
}
/**
* xmlExpNewSeq :
* @ ctxt : the expression context
* @ left : left expression
* @ right : right expression
*
* Get the atom associated to the sequence @ left , @ right
* Note that @ left and @ right are consumed in the operation , to keep
* an handle on them use xmlExpRef ( ) and use xmlExpFree ( ) to release them ,
* this is true even in case of failure ( unless ctxt = = NULL ) .
*
* Returns the node or NULL in case of error
*/
xmlExpNodePtr
xmlExpNewSeq ( xmlExpCtxtPtr ctxt , xmlExpNodePtr left , xmlExpNodePtr right ) {
2006-03-10 03:36:23 +03:00
if ( ctxt = = NULL )
return ( NULL ) ;
if ( ( left = = NULL ) | | ( right = = NULL ) ) {
2005-08-23 17:41:17 +04:00
xmlExpFree ( ctxt , left ) ;
xmlExpFree ( ctxt , right ) ;
return ( NULL ) ;
}
return ( xmlExpHashGetEntry ( ctxt , XML_EXP_SEQ , left , right , NULL , 0 , 0 ) ) ;
}
/**
* xmlExpNewRange :
* @ ctxt : the expression context
* @ subset : the expression to be repeated
* @ min : the lower bound for the repetition
* @ max : the upper bound for the repetition , - 1 means infinite
*
* Get the atom associated to the range ( @ subset ) { @ min , @ max }
* Note that @ subset is consumed in the operation , to keep
* an handle on it use xmlExpRef ( ) and use xmlExpFree ( ) to release it ,
* this is true even in case of failure ( unless ctxt = = NULL ) .
*
* Returns the node or NULL in case of error
*/
xmlExpNodePtr
xmlExpNewRange ( xmlExpCtxtPtr ctxt , xmlExpNodePtr subset , int min , int max ) {
2006-03-10 03:36:23 +03:00
if ( ctxt = = NULL )
return ( NULL ) ;
if ( ( subset = = NULL ) | | ( min < 0 ) | | ( max < - 1 ) | |
2005-08-23 17:41:17 +04:00
( ( max > = 0 ) & & ( min > max ) ) ) {
xmlExpFree ( ctxt , subset ) ;
return ( NULL ) ;
}
return ( xmlExpHashGetEntry ( ctxt , XML_EXP_COUNT , subset ,
NULL , NULL , min , max ) ) ;
}
2005-08-22 04:20:58 +04:00
/************************************************************************
* *
* Public API for operations on expressions *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static int
2012-09-11 09:26:36 +04:00
xmlExpGetLanguageInt ( xmlExpCtxtPtr ctxt , xmlExpNodePtr exp ,
2005-08-22 04:20:58 +04:00
const xmlChar * * list , int len , int nb ) {
int tmp , tmp2 ;
tail :
switch ( exp - > type ) {
case XML_EXP_EMPTY :
return ( 0 ) ;
case XML_EXP_ATOM :
for ( tmp = 0 ; tmp < nb ; tmp + + )
if ( list [ tmp ] = = exp - > exp_str )
return ( 0 ) ;
if ( nb > = len )
return ( - 2 ) ;
2009-09-05 16:52:55 +04:00
list [ nb ] = exp - > exp_str ;
2005-08-22 04:20:58 +04:00
return ( 1 ) ;
case XML_EXP_COUNT :
exp = exp - > exp_left ;
goto tail ;
case XML_EXP_SEQ :
case XML_EXP_OR :
tmp = xmlExpGetLanguageInt ( ctxt , exp - > exp_left , list , len , nb ) ;
if ( tmp < 0 )
return ( tmp ) ;
tmp2 = xmlExpGetLanguageInt ( ctxt , exp - > exp_right , list , len ,
nb + tmp ) ;
if ( tmp2 < 0 )
return ( tmp2 ) ;
return ( tmp + tmp2 ) ;
}
return ( - 1 ) ;
}
/**
* xmlExpGetLanguage :
* @ ctxt : the expression context
* @ exp : the expression
2005-10-27 15:56:20 +04:00
* @ langList : where to store the tokens
2012-09-28 10:59:33 +04:00
* @ len : the allocated length of @ list
2005-08-22 04:20:58 +04:00
*
* Find all the strings used in @ exp and store them in @ list
*
* Returns the number of unique strings found , - 1 in case of errors and
* - 2 if there is more than @ len strings
*/
int
2012-09-11 09:26:36 +04:00
xmlExpGetLanguage ( xmlExpCtxtPtr ctxt , xmlExpNodePtr exp ,
2005-10-27 15:56:20 +04:00
const xmlChar * * langList , int len ) {
if ( ( ctxt = = NULL ) | | ( exp = = NULL ) | | ( langList = = NULL ) | | ( len < = 0 ) )
2005-08-22 04:20:58 +04:00
return ( - 1 ) ;
2005-10-27 15:56:20 +04:00
return ( xmlExpGetLanguageInt ( ctxt , exp , langList , len , 0 ) ) ;
2005-08-22 04:20:58 +04:00
}
static int
2012-09-11 09:26:36 +04:00
xmlExpGetStartInt ( xmlExpCtxtPtr ctxt , xmlExpNodePtr exp ,
2005-08-22 04:20:58 +04:00
const xmlChar * * list , int len , int nb ) {
int tmp , tmp2 ;
tail :
switch ( exp - > type ) {
case XML_EXP_FORBID :
return ( 0 ) ;
case XML_EXP_EMPTY :
return ( 0 ) ;
case XML_EXP_ATOM :
for ( tmp = 0 ; tmp < nb ; tmp + + )
if ( list [ tmp ] = = exp - > exp_str )
return ( 0 ) ;
if ( nb > = len )
return ( - 2 ) ;
2009-09-05 16:52:55 +04:00
list [ nb ] = exp - > exp_str ;
2005-08-22 04:20:58 +04:00
return ( 1 ) ;
case XML_EXP_COUNT :
exp = exp - > exp_left ;
goto tail ;
case XML_EXP_SEQ :
tmp = xmlExpGetStartInt ( ctxt , exp - > exp_left , list , len , nb ) ;
if ( tmp < 0 )
return ( tmp ) ;
if ( IS_NILLABLE ( exp - > exp_left ) ) {
tmp2 = xmlExpGetStartInt ( ctxt , exp - > exp_right , list , len ,
nb + tmp ) ;
if ( tmp2 < 0 )
return ( tmp2 ) ;
tmp + = tmp2 ;
}
return ( tmp ) ;
case XML_EXP_OR :
tmp = xmlExpGetStartInt ( ctxt , exp - > exp_left , list , len , nb ) ;
if ( tmp < 0 )
return ( tmp ) ;
tmp2 = xmlExpGetStartInt ( ctxt , exp - > exp_right , list , len ,
nb + tmp ) ;
if ( tmp2 < 0 )
return ( tmp2 ) ;
return ( tmp + tmp2 ) ;
}
return ( - 1 ) ;
}
/**
* xmlExpGetStart :
* @ ctxt : the expression context
* @ exp : the expression
2005-10-27 15:56:20 +04:00
* @ tokList : where to store the tokens
2012-09-28 10:59:33 +04:00
* @ len : the allocated length of @ list
2005-08-22 04:20:58 +04:00
*
* Find all the strings that appears at the start of the languages
* accepted by @ exp and store them in @ list . E . g . for ( a , b ) | c
* it will return the list [ a , c ]
*
* Returns the number of unique strings found , - 1 in case of errors and
* - 2 if there is more than @ len strings
*/
int
2012-09-11 09:26:36 +04:00
xmlExpGetStart ( xmlExpCtxtPtr ctxt , xmlExpNodePtr exp ,
2005-10-27 15:56:20 +04:00
const xmlChar * * tokList , int len ) {
if ( ( ctxt = = NULL ) | | ( exp = = NULL ) | | ( tokList = = NULL ) | | ( len < = 0 ) )
2005-08-22 04:20:58 +04:00
return ( - 1 ) ;
2005-10-27 15:56:20 +04:00
return ( xmlExpGetStartInt ( ctxt , exp , tokList , len , 0 ) ) ;
2005-08-22 04:20:58 +04:00
}
/**
* xmlExpIsNillable :
* @ exp : the expression
*
2019-09-30 18:04:54 +03:00
* Finds if the expression is nillable , i . e . if it accepts the empty sequence
2005-08-22 04:20:58 +04:00
*
* Returns 1 if nillable , 0 if not and - 1 in case of error
*/
int
xmlExpIsNillable ( xmlExpNodePtr exp ) {
if ( exp = = NULL )
return ( - 1 ) ;
return ( IS_NILLABLE ( exp ) ! = 0 ) ;
}
static xmlExpNodePtr
xmlExpStringDeriveInt ( xmlExpCtxtPtr ctxt , xmlExpNodePtr exp , const xmlChar * str )
{
xmlExpNodePtr ret ;
switch ( exp - > type ) {
case XML_EXP_EMPTY :
return ( forbiddenExp ) ;
case XML_EXP_FORBID :
return ( forbiddenExp ) ;
case XML_EXP_ATOM :
if ( exp - > exp_str = = str ) {
ret = emptyExp ;
} else {
/* TODO wildcards here */
ret = forbiddenExp ;
}
return ( ret ) ;
case XML_EXP_OR : {
xmlExpNodePtr tmp ;
tmp = xmlExpStringDeriveInt ( ctxt , exp - > exp_left , str ) ;
if ( tmp = = NULL ) {
return ( NULL ) ;
}
ret = xmlExpStringDeriveInt ( ctxt , exp - > exp_right , str ) ;
if ( ret = = NULL ) {
xmlExpFree ( ctxt , tmp ) ;
return ( NULL ) ;
}
ret = xmlExpHashGetEntry ( ctxt , XML_EXP_OR , tmp , ret ,
NULL , 0 , 0 ) ;
return ( ret ) ;
}
case XML_EXP_SEQ :
ret = xmlExpStringDeriveInt ( ctxt , exp - > exp_left , str ) ;
if ( ret = = NULL ) {
return ( NULL ) ;
} else if ( ret = = forbiddenExp ) {
if ( IS_NILLABLE ( exp - > exp_left ) ) {
ret = xmlExpStringDeriveInt ( ctxt , exp - > exp_right , str ) ;
}
} else {
exp - > exp_right - > ref + + ;
ret = xmlExpHashGetEntry ( ctxt , XML_EXP_SEQ , ret , exp - > exp_right ,
NULL , 0 , 0 ) ;
}
return ( ret ) ;
case XML_EXP_COUNT : {
int min , max ;
xmlExpNodePtr tmp ;
if ( exp - > exp_max = = 0 )
return ( forbiddenExp ) ;
ret = xmlExpStringDeriveInt ( ctxt , exp - > exp_left , str ) ;
if ( ret = = NULL )
return ( NULL ) ;
if ( ret = = forbiddenExp ) {
return ( ret ) ;
}
if ( exp - > exp_max = = 1 )
return ( ret ) ;
if ( exp - > exp_max < 0 ) /* unbounded */
max = - 1 ;
else
max = exp - > exp_max - 1 ;
if ( exp - > exp_min > 0 )
min = exp - > exp_min - 1 ;
else
min = 0 ;
exp - > exp_left - > ref + + ;
tmp = xmlExpHashGetEntry ( ctxt , XML_EXP_COUNT , exp - > exp_left , NULL ,
NULL , min , max ) ;
if ( ret = = emptyExp ) {
return ( tmp ) ;
}
return ( xmlExpHashGetEntry ( ctxt , XML_EXP_SEQ , ret , tmp ,
NULL , 0 , 0 ) ) ;
}
}
return ( NULL ) ;
}
/**
* xmlExpStringDerive :
* @ ctxt : the expression context
* @ exp : the expression
* @ str : the string
* @ len : the string len in bytes if available
*
* Do one step of Brzozowski derivation of the expression @ exp with
* respect to the input string
*
* Returns the resulting expression or NULL in case of internal error
*/
xmlExpNodePtr
xmlExpStringDerive ( xmlExpCtxtPtr ctxt , xmlExpNodePtr exp ,
const xmlChar * str , int len ) {
const xmlChar * input ;
if ( ( exp = = NULL ) | | ( ctxt = = NULL ) | | ( str = = NULL ) ) {
return ( NULL ) ;
}
/*
2016-04-13 17:56:07 +03:00
* check the string is in the dictionary , if yes use an interned
2005-08-22 04:20:58 +04:00
* copy , otherwise we know it ' s not an acceptable input
*/
input = xmlDictExists ( ctxt - > dict , str , len ) ;
if ( input = = NULL ) {
return ( forbiddenExp ) ;
}
return ( xmlExpStringDeriveInt ( ctxt , exp , input ) ) ;
}
static int
xmlExpCheckCard ( xmlExpNodePtr exp , xmlExpNodePtr sub ) {
int ret = 1 ;
if ( sub - > c_max = = - 1 ) {
if ( exp - > c_max ! = - 1 )
ret = 0 ;
} else if ( ( exp - > c_max > = 0 ) & & ( exp - > c_max < sub - > c_max ) ) {
ret = 0 ;
}
#if 0
if ( ( IS_NILLABLE ( sub ) ) & & ( ! IS_NILLABLE ( exp ) ) )
ret = 0 ;
# endif
return ( ret ) ;
}
static xmlExpNodePtr xmlExpExpDeriveInt ( xmlExpCtxtPtr ctxt , xmlExpNodePtr exp ,
xmlExpNodePtr sub ) ;
/**
* xmlExpDivide :
* @ ctxt : the expressions context
* @ exp : the englobing expression
* @ sub : the subexpression
* @ mult : the multiple expression
* @ remain : the remain from the derivation of the multiple
*
* Check if exp is a multiple of sub , i . e . if there is a finite number n
* so that sub { n } subsume exp
*
* Returns the multiple value if successful , 0 if it is not a multiple
2019-09-30 18:04:54 +03:00
* and - 1 in case of internal error .
2005-08-22 04:20:58 +04:00
*/
static int
xmlExpDivide ( xmlExpCtxtPtr ctxt , xmlExpNodePtr exp , xmlExpNodePtr sub ,
xmlExpNodePtr * mult , xmlExpNodePtr * remain ) {
int i ;
xmlExpNodePtr tmp , tmp2 ;
if ( mult ! = NULL ) * mult = NULL ;
if ( remain ! = NULL ) * remain = NULL ;
if ( exp - > c_max = = - 1 ) return ( 0 ) ;
if ( IS_NILLABLE ( exp ) & & ( ! IS_NILLABLE ( sub ) ) ) return ( 0 ) ;
for ( i = 1 ; i < = exp - > c_max ; i + + ) {
sub - > ref + + ;
tmp = xmlExpHashGetEntry ( ctxt , XML_EXP_COUNT ,
sub , NULL , NULL , i , i ) ;
if ( tmp = = NULL ) {
return ( - 1 ) ;
}
if ( ! xmlExpCheckCard ( tmp , exp ) ) {
xmlExpFree ( ctxt , tmp ) ;
continue ;
}
tmp2 = xmlExpExpDeriveInt ( ctxt , tmp , exp ) ;
if ( tmp2 = = NULL ) {
xmlExpFree ( ctxt , tmp ) ;
return ( - 1 ) ;
}
if ( ( tmp2 ! = forbiddenExp ) & & ( IS_NILLABLE ( tmp2 ) ) ) {
if ( remain ! = NULL )
* remain = tmp2 ;
else
xmlExpFree ( ctxt , tmp2 ) ;
if ( mult ! = NULL )
* mult = tmp ;
else
xmlExpFree ( ctxt , tmp ) ;
return ( i ) ;
}
xmlExpFree ( ctxt , tmp ) ;
xmlExpFree ( ctxt , tmp2 ) ;
}
return ( 0 ) ;
}
/**
* xmlExpExpDeriveInt :
* @ ctxt : the expressions context
* @ exp : the englobing expression
* @ sub : the subexpression
*
* Try to do a step of Brzozowski derivation but at a higher level
* the input being a subexpression .
*
* Returns the resulting expression or NULL in case of internal error
*/
static xmlExpNodePtr
xmlExpExpDeriveInt ( xmlExpCtxtPtr ctxt , xmlExpNodePtr exp , xmlExpNodePtr sub ) {
xmlExpNodePtr ret , tmp , tmp2 , tmp3 ;
const xmlChar * * tab ;
int len , i ;
/*
* In case of equality and if the expression can only consume a finite
* amount , then the derivation is empty
*/
if ( ( exp = = sub ) & & ( exp - > c_max > = 0 ) ) {
return ( emptyExp ) ;
}
/*
* decompose sub sequence first
*/
if ( sub - > type = = XML_EXP_EMPTY ) {
exp - > ref + + ;
return ( exp ) ;
}
if ( sub - > type = = XML_EXP_SEQ ) {
tmp = xmlExpExpDeriveInt ( ctxt , exp , sub - > exp_left ) ;
if ( tmp = = NULL )
return ( NULL ) ;
if ( tmp = = forbiddenExp )
return ( tmp ) ;
ret = xmlExpExpDeriveInt ( ctxt , tmp , sub - > exp_right ) ;
xmlExpFree ( ctxt , tmp ) ;
return ( ret ) ;
}
if ( sub - > type = = XML_EXP_OR ) {
tmp = xmlExpExpDeriveInt ( ctxt , exp , sub - > exp_left ) ;
if ( tmp = = forbiddenExp )
return ( tmp ) ;
if ( tmp = = NULL )
return ( NULL ) ;
ret = xmlExpExpDeriveInt ( ctxt , exp , sub - > exp_right ) ;
if ( ( ret = = NULL ) | | ( ret = = forbiddenExp ) ) {
xmlExpFree ( ctxt , tmp ) ;
return ( ret ) ;
}
return ( xmlExpHashGetEntry ( ctxt , XML_EXP_OR , tmp , ret , NULL , 0 , 0 ) ) ;
}
if ( ! xmlExpCheckCard ( exp , sub ) ) {
return ( forbiddenExp ) ;
}
switch ( exp - > type ) {
case XML_EXP_EMPTY :
if ( sub = = emptyExp )
return ( emptyExp ) ;
return ( forbiddenExp ) ;
case XML_EXP_FORBID :
return ( forbiddenExp ) ;
case XML_EXP_ATOM :
if ( sub - > type = = XML_EXP_ATOM ) {
/* TODO: handle wildcards */
if ( exp - > exp_str = = sub - > exp_str ) {
return ( emptyExp ) ;
}
return ( forbiddenExp ) ;
}
if ( ( sub - > type = = XML_EXP_COUNT ) & &
( sub - > exp_max = = 1 ) & &
( sub - > exp_left - > type = = XML_EXP_ATOM ) ) {
/* TODO: handle wildcards */
if ( exp - > exp_str = = sub - > exp_left - > exp_str ) {
return ( emptyExp ) ;
}
return ( forbiddenExp ) ;
}
return ( forbiddenExp ) ;
case XML_EXP_SEQ :
/* try to get the sequence consumed only if possible */
if ( xmlExpCheckCard ( exp - > exp_left , sub ) ) {
/* See if the sequence can be consumed directly */
ret = xmlExpExpDeriveInt ( ctxt , exp - > exp_left , sub ) ;
if ( ( ret ! = forbiddenExp ) & & ( ret ! = NULL ) ) {
/*
* TODO : assumption here that we are determinist
* i . e . we won ' t get to a nillable exp left
* subset which could be matched by the right
* part too .
* e . g . : ( a | b ) + , ( a | c ) and ' a + , a '
*/
exp - > exp_right - > ref + + ;
return ( xmlExpHashGetEntry ( ctxt , XML_EXP_SEQ , ret ,
exp - > exp_right , NULL , 0 , 0 ) ) ;
}
}
/* Try instead to decompose */
if ( sub - > type = = XML_EXP_COUNT ) {
int min , max ;
ret = xmlExpExpDeriveInt ( ctxt , exp - > exp_left , sub - > exp_left ) ;
if ( ret = = NULL )
return ( NULL ) ;
if ( ret ! = forbiddenExp ) {
if ( sub - > exp_max < 0 )
max = - 1 ;
else
max = sub - > exp_max - 1 ;
if ( sub - > exp_min > 0 )
min = sub - > exp_min - 1 ;
else
min = 0 ;
exp - > exp_right - > ref + + ;
tmp = xmlExpHashGetEntry ( ctxt , XML_EXP_SEQ , ret ,
exp - > exp_right , NULL , 0 , 0 ) ;
if ( tmp = = NULL )
return ( NULL ) ;
sub - > exp_left - > ref + + ;
tmp2 = xmlExpHashGetEntry ( ctxt , XML_EXP_COUNT ,
sub - > exp_left , NULL , NULL , min , max ) ;
if ( tmp2 = = NULL ) {
xmlExpFree ( ctxt , tmp ) ;
return ( NULL ) ;
}
ret = xmlExpExpDeriveInt ( ctxt , tmp , tmp2 ) ;
xmlExpFree ( ctxt , tmp ) ;
xmlExpFree ( ctxt , tmp2 ) ;
return ( ret ) ;
}
}
/* we made no progress on structured operations */
break ;
case XML_EXP_OR :
ret = xmlExpExpDeriveInt ( ctxt , exp - > exp_left , sub ) ;
if ( ret = = NULL )
return ( NULL ) ;
tmp = xmlExpExpDeriveInt ( ctxt , exp - > exp_right , sub ) ;
if ( tmp = = NULL ) {
xmlExpFree ( ctxt , ret ) ;
return ( NULL ) ;
}
return ( xmlExpHashGetEntry ( ctxt , XML_EXP_OR , ret , tmp , NULL , 0 , 0 ) ) ;
case XML_EXP_COUNT : {
int min , max ;
if ( sub - > type = = XML_EXP_COUNT ) {
/*
* Try to see if the loop is completely subsumed
*/
tmp = xmlExpExpDeriveInt ( ctxt , exp - > exp_left , sub - > exp_left ) ;
if ( tmp = = NULL )
return ( NULL ) ;
if ( tmp = = forbiddenExp ) {
int mult ;
mult = xmlExpDivide ( ctxt , sub - > exp_left , exp - > exp_left ,
NULL , & tmp ) ;
if ( mult < = 0 ) {
return ( forbiddenExp ) ;
}
if ( sub - > exp_max = = - 1 ) {
max = - 1 ;
if ( exp - > exp_max = = - 1 ) {
if ( exp - > exp_min < = sub - > exp_min * mult )
min = 0 ;
else
min = exp - > exp_min - sub - > exp_min * mult ;
} else {
xmlExpFree ( ctxt , tmp ) ;
return ( forbiddenExp ) ;
}
} else {
if ( exp - > exp_max = = - 1 ) {
if ( exp - > exp_min > sub - > exp_min * mult ) {
max = - 1 ;
min = exp - > exp_min - sub - > exp_min * mult ;
} else {
max = - 1 ;
min = 0 ;
}
} else {
if ( exp - > exp_max < sub - > exp_max * mult ) {
xmlExpFree ( ctxt , tmp ) ;
return ( forbiddenExp ) ;
}
if ( sub - > exp_max * mult > exp - > exp_min )
min = 0 ;
else
min = exp - > exp_min - sub - > exp_max * mult ;
max = exp - > exp_max - sub - > exp_max * mult ;
}
}
} else if ( ! IS_NILLABLE ( tmp ) ) {
/*
* TODO : loop here to try to grow if working on finite
* blocks .
*/
xmlExpFree ( ctxt , tmp ) ;
return ( forbiddenExp ) ;
} else if ( sub - > exp_max = = - 1 ) {
if ( exp - > exp_max = = - 1 ) {
if ( exp - > exp_min < = sub - > exp_min ) {
max = - 1 ;
min = 0 ;
} else {
max = - 1 ;
min = exp - > exp_min - sub - > exp_min ;
}
} else if ( exp - > exp_min > sub - > exp_min ) {
xmlExpFree ( ctxt , tmp ) ;
return ( forbiddenExp ) ;
} else {
max = - 1 ;
min = 0 ;
}
} else {
if ( exp - > exp_max = = - 1 ) {
if ( exp - > exp_min > sub - > exp_min ) {
max = - 1 ;
min = exp - > exp_min - sub - > exp_min ;
} else {
max = - 1 ;
min = 0 ;
}
} else {
if ( exp - > exp_max < sub - > exp_max ) {
xmlExpFree ( ctxt , tmp ) ;
return ( forbiddenExp ) ;
}
if ( sub - > exp_max > exp - > exp_min )
min = 0 ;
else
min = exp - > exp_min - sub - > exp_max ;
max = exp - > exp_max - sub - > exp_max ;
}
}
exp - > exp_left - > ref + + ;
tmp2 = xmlExpHashGetEntry ( ctxt , XML_EXP_COUNT , exp - > exp_left ,
NULL , NULL , min , max ) ;
if ( tmp2 = = NULL ) {
return ( NULL ) ;
}
ret = xmlExpHashGetEntry ( ctxt , XML_EXP_SEQ , tmp , tmp2 ,
NULL , 0 , 0 ) ;
return ( ret ) ;
}
tmp = xmlExpExpDeriveInt ( ctxt , exp - > exp_left , sub ) ;
if ( tmp = = NULL )
return ( NULL ) ;
if ( tmp = = forbiddenExp ) {
return ( forbiddenExp ) ;
}
if ( exp - > exp_min > 0 )
min = exp - > exp_min - 1 ;
else
min = 0 ;
if ( exp - > exp_max < 0 )
max = - 1 ;
else
max = exp - > exp_max - 1 ;
exp - > exp_left - > ref + + ;
tmp2 = xmlExpHashGetEntry ( ctxt , XML_EXP_COUNT , exp - > exp_left ,
NULL , NULL , min , max ) ;
if ( tmp2 = = NULL )
return ( NULL ) ;
ret = xmlExpHashGetEntry ( ctxt , XML_EXP_SEQ , tmp , tmp2 ,
NULL , 0 , 0 ) ;
return ( ret ) ;
}
}
2005-08-23 17:41:17 +04:00
if ( IS_NILLABLE ( sub ) ) {
if ( ! ( IS_NILLABLE ( exp ) ) )
return ( forbiddenExp ) ;
else
ret = emptyExp ;
} else
ret = NULL ;
2005-08-22 04:20:58 +04:00
/*
* here the structured derivation made no progress so
* we use the default token based derivation to force one more step
*/
if ( ctxt - > tabSize = = 0 )
ctxt - > tabSize = 40 ;
tab = ( const xmlChar * * ) xmlMalloc ( ctxt - > tabSize *
sizeof ( const xmlChar * ) ) ;
if ( tab = = NULL ) {
return ( NULL ) ;
}
/*
* collect all the strings accepted by the subexpression on input
*/
len = xmlExpGetStartInt ( ctxt , sub , tab , ctxt - > tabSize , 0 ) ;
while ( len < 0 ) {
const xmlChar * * temp ;
2005-10-07 06:33:00 +04:00
temp = ( const xmlChar * * ) xmlRealloc ( ( xmlChar * * ) tab , ctxt - > tabSize * 2 *
2005-08-22 04:20:58 +04:00
sizeof ( const xmlChar * ) ) ;
if ( temp = = NULL ) {
2005-10-07 06:33:00 +04:00
xmlFree ( ( xmlChar * * ) tab ) ;
2005-08-22 04:20:58 +04:00
return ( NULL ) ;
}
tab = temp ;
ctxt - > tabSize * = 2 ;
len = xmlExpGetStartInt ( ctxt , sub , tab , ctxt - > tabSize , 0 ) ;
}
for ( i = 0 ; i < len ; i + + ) {
tmp = xmlExpStringDeriveInt ( ctxt , exp , tab [ i ] ) ;
if ( ( tmp = = NULL ) | | ( tmp = = forbiddenExp ) ) {
xmlExpFree ( ctxt , ret ) ;
2005-10-07 06:33:00 +04:00
xmlFree ( ( xmlChar * * ) tab ) ;
2005-08-22 04:20:58 +04:00
return ( tmp ) ;
}
tmp2 = xmlExpStringDeriveInt ( ctxt , sub , tab [ i ] ) ;
if ( ( tmp2 = = NULL ) | | ( tmp2 = = forbiddenExp ) ) {
xmlExpFree ( ctxt , tmp ) ;
xmlExpFree ( ctxt , ret ) ;
2005-10-07 06:33:00 +04:00
xmlFree ( ( xmlChar * * ) tab ) ;
2005-08-22 04:20:58 +04:00
return ( tmp ) ;
}
tmp3 = xmlExpExpDeriveInt ( ctxt , tmp , tmp2 ) ;
xmlExpFree ( ctxt , tmp ) ;
xmlExpFree ( ctxt , tmp2 ) ;
if ( ( tmp3 = = NULL ) | | ( tmp3 = = forbiddenExp ) ) {
xmlExpFree ( ctxt , ret ) ;
2005-10-07 06:33:00 +04:00
xmlFree ( ( xmlChar * * ) tab ) ;
2005-08-22 04:20:58 +04:00
return ( tmp3 ) ;
}
if ( ret = = NULL )
ret = tmp3 ;
else {
ret = xmlExpHashGetEntry ( ctxt , XML_EXP_OR , ret , tmp3 , NULL , 0 , 0 ) ;
if ( ret = = NULL ) {
2005-10-07 06:33:00 +04:00
xmlFree ( ( xmlChar * * ) tab ) ;
2005-08-22 04:20:58 +04:00
return ( NULL ) ;
}
}
}
2005-10-07 06:33:00 +04:00
xmlFree ( ( xmlChar * * ) tab ) ;
2005-08-22 04:20:58 +04:00
return ( ret ) ;
}
2012-09-11 09:26:36 +04:00
2005-08-22 18:43:43 +04:00
/**
* xmlExpExpDerive :
* @ ctxt : the expressions context
* @ exp : the englobing expression
* @ sub : the subexpression
*
* Evaluates the expression resulting from @ exp consuming a sub expression @ sub
* Based on algebraic derivation and sometimes direct Brzozowski derivation
2019-09-30 18:04:54 +03:00
* it usually takes less than linear time and can handle expressions generating
2005-08-22 18:43:43 +04:00
* infinite languages .
*
* Returns the resulting expression or NULL in case of internal error , the
* result must be freed
*/
xmlExpNodePtr
xmlExpExpDerive ( xmlExpCtxtPtr ctxt , xmlExpNodePtr exp , xmlExpNodePtr sub ) {
if ( ( exp = = NULL ) | | ( ctxt = = NULL ) | | ( sub = = NULL ) )
return ( NULL ) ;
/*
* O ( 1 ) speedups
*/
if ( IS_NILLABLE ( sub ) & & ( ! IS_NILLABLE ( exp ) ) ) {
return ( forbiddenExp ) ;
}
if ( xmlExpCheckCard ( exp , sub ) = = 0 ) {
return ( forbiddenExp ) ;
}
return ( xmlExpExpDeriveInt ( ctxt , exp , sub ) ) ;
}
2005-08-22 04:20:58 +04:00
/**
* xmlExpSubsume :
* @ ctxt : the expressions context
* @ exp : the englobing expression
* @ sub : the subexpression
*
2019-09-30 18:04:54 +03:00
* Check whether @ exp accepts all the languages accepted by @ sub
2005-08-22 04:20:58 +04:00
* the input being a subexpression .
*
* Returns 1 if true 0 if false and - 1 in case of failure .
*/
int
xmlExpSubsume ( xmlExpCtxtPtr ctxt , xmlExpNodePtr exp , xmlExpNodePtr sub ) {
xmlExpNodePtr tmp ;
2012-09-11 09:26:36 +04:00
2005-08-22 04:20:58 +04:00
if ( ( exp = = NULL ) | | ( ctxt = = NULL ) | | ( sub = = NULL ) )
return ( - 1 ) ;
/*
* TODO : speedup by checking the language of sub is a subset of the
* language of exp
*/
/*
* O ( 1 ) speedups
*/
if ( IS_NILLABLE ( sub ) & & ( ! IS_NILLABLE ( exp ) ) ) {
return ( 0 ) ;
}
if ( xmlExpCheckCard ( exp , sub ) = = 0 ) {
return ( 0 ) ;
}
tmp = xmlExpExpDeriveInt ( ctxt , exp , sub ) ;
if ( tmp = = NULL )
return ( - 1 ) ;
if ( tmp = = forbiddenExp )
return ( 0 ) ;
if ( tmp = = emptyExp )
return ( 1 ) ;
if ( ( tmp ! = NULL ) & & ( IS_NILLABLE ( tmp ) ) ) {
xmlExpFree ( ctxt , tmp ) ;
return ( 1 ) ;
}
xmlExpFree ( ctxt , tmp ) ;
return ( 0 ) ;
}
2005-08-22 16:07:04 +04:00
/************************************************************************
* *
2012-09-11 09:26:36 +04:00
* Parsing expression *
2005-08-22 16:07:04 +04:00
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static xmlExpNodePtr xmlExpParseExpr ( xmlExpCtxtPtr ctxt ) ;
# undef CUR
# define CUR (*ctxt->cur)
# undef NEXT
# define NEXT ctxt->cur++;
# undef IS_BLANK
# define IS_BLANK(c) ((c == ' ') || (c == '\n') || (c == '\r') || (c == '\t'))
# define SKIP_BLANKS while (IS_BLANK(*ctxt->cur)) ctxt->cur++;
static int
xmlExpParseNumber ( xmlExpCtxtPtr ctxt ) {
int ret = 0 ;
SKIP_BLANKS
if ( CUR = = ' * ' ) {
NEXT
return ( - 1 ) ;
}
if ( ( CUR < ' 0 ' ) | | ( CUR > ' 9 ' ) )
return ( - 1 ) ;
while ( ( CUR > = ' 0 ' ) & & ( CUR < = ' 9 ' ) ) {
ret = ret * 10 + ( CUR - ' 0 ' ) ;
NEXT
}
return ( ret ) ;
}
static xmlExpNodePtr
xmlExpParseOr ( xmlExpCtxtPtr ctxt ) {
const char * base ;
xmlExpNodePtr ret ;
const xmlChar * val ;
SKIP_BLANKS
base = ctxt - > cur ;
if ( * ctxt - > cur = = ' ( ' ) {
NEXT
ret = xmlExpParseExpr ( ctxt ) ;
SKIP_BLANKS
if ( * ctxt - > cur ! = ' ) ' ) {
fprintf ( stderr , " unbalanced '(' : %s \n " , base ) ;
xmlExpFree ( ctxt , ret ) ;
return ( NULL ) ;
}
NEXT ;
SKIP_BLANKS
goto parse_quantifier ;
}
while ( ( CUR ! = 0 ) & & ( ! ( IS_BLANK ( CUR ) ) ) & & ( CUR ! = ' ( ' ) & &
( CUR ! = ' ) ' ) & & ( CUR ! = ' | ' ) & & ( CUR ! = ' , ' ) & & ( CUR ! = ' { ' ) & &
( CUR ! = ' * ' ) & & ( CUR ! = ' + ' ) & & ( CUR ! = ' ? ' ) & & ( CUR ! = ' } ' ) )
NEXT ;
val = xmlDictLookup ( ctxt - > dict , BAD_CAST base , ctxt - > cur - base ) ;
if ( val = = NULL )
return ( NULL ) ;
ret = xmlExpHashGetEntry ( ctxt , XML_EXP_ATOM , NULL , NULL , val , 0 , 0 ) ;
if ( ret = = NULL )
return ( NULL ) ;
SKIP_BLANKS
parse_quantifier :
if ( CUR = = ' { ' ) {
int min , max ;
NEXT
min = xmlExpParseNumber ( ctxt ) ;
if ( min < 0 ) {
xmlExpFree ( ctxt , ret ) ;
return ( NULL ) ;
}
SKIP_BLANKS
if ( CUR = = ' , ' ) {
NEXT
max = xmlExpParseNumber ( ctxt ) ;
SKIP_BLANKS
} else
max = min ;
if ( CUR ! = ' } ' ) {
xmlExpFree ( ctxt , ret ) ;
return ( NULL ) ;
}
NEXT
ret = xmlExpHashGetEntry ( ctxt , XML_EXP_COUNT , ret , NULL , NULL ,
min , max ) ;
SKIP_BLANKS
} else if ( CUR = = ' ? ' ) {
NEXT
ret = xmlExpHashGetEntry ( ctxt , XML_EXP_COUNT , ret , NULL , NULL ,
0 , 1 ) ;
SKIP_BLANKS
} else if ( CUR = = ' + ' ) {
NEXT
ret = xmlExpHashGetEntry ( ctxt , XML_EXP_COUNT , ret , NULL , NULL ,
1 , - 1 ) ;
SKIP_BLANKS
} else if ( CUR = = ' * ' ) {
NEXT
ret = xmlExpHashGetEntry ( ctxt , XML_EXP_COUNT , ret , NULL , NULL ,
0 , - 1 ) ;
SKIP_BLANKS
2012-09-11 09:26:36 +04:00
}
2005-08-22 16:07:04 +04:00
return ( ret ) ;
}
static xmlExpNodePtr
xmlExpParseSeq ( xmlExpCtxtPtr ctxt ) {
xmlExpNodePtr ret , right ;
ret = xmlExpParseOr ( ctxt ) ;
SKIP_BLANKS
while ( CUR = = ' | ' ) {
NEXT
right = xmlExpParseOr ( ctxt ) ;
if ( right = = NULL ) {
xmlExpFree ( ctxt , ret ) ;
return ( NULL ) ;
}
ret = xmlExpHashGetEntry ( ctxt , XML_EXP_OR , ret , right , NULL , 0 , 0 ) ;
if ( ret = = NULL )
return ( NULL ) ;
}
return ( ret ) ;
}
static xmlExpNodePtr
xmlExpParseExpr ( xmlExpCtxtPtr ctxt ) {
xmlExpNodePtr ret , right ;
ret = xmlExpParseSeq ( ctxt ) ;
SKIP_BLANKS
while ( CUR = = ' , ' ) {
NEXT
right = xmlExpParseSeq ( ctxt ) ;
if ( right = = NULL ) {
xmlExpFree ( ctxt , ret ) ;
return ( NULL ) ;
}
ret = xmlExpHashGetEntry ( ctxt , XML_EXP_SEQ , ret , right , NULL , 0 , 0 ) ;
if ( ret = = NULL )
return ( NULL ) ;
}
return ( ret ) ;
}
/**
* xmlExpParse :
* @ ctxt : the expressions context
* @ expr : the 0 terminated string
*
* Minimal parser for regexps , it understand the following constructs
* - string terminals
* - choice operator |
* - sequence operator ,
* - subexpressions ( . . . )
* - usual cardinality operators + * and ?
* - finite sequences { min , max }
* - infinite sequences { min , * }
* There is minimal checkings made especially no checking on strings values
*
* Returns a new expression or NULL in case of failure
*/
xmlExpNodePtr
xmlExpParse ( xmlExpCtxtPtr ctxt , const char * expr ) {
xmlExpNodePtr ret ;
ctxt - > expr = expr ;
ctxt - > cur = expr ;
ret = xmlExpParseExpr ( ctxt ) ;
SKIP_BLANKS
if ( * ctxt - > cur ! = 0 ) {
xmlExpFree ( ctxt , ret ) ;
return ( NULL ) ;
}
return ( ret ) ;
}
static void
xmlExpDumpInt ( xmlBufferPtr buf , xmlExpNodePtr expr , int glob ) {
xmlExpNodePtr c ;
if ( expr = = NULL ) return ;
if ( glob ) xmlBufferWriteChar ( buf , " ( " ) ;
switch ( expr - > type ) {
case XML_EXP_EMPTY :
xmlBufferWriteChar ( buf , " empty " ) ;
break ;
case XML_EXP_FORBID :
xmlBufferWriteChar ( buf , " forbidden " ) ;
break ;
case XML_EXP_ATOM :
xmlBufferWriteCHAR ( buf , expr - > exp_str ) ;
break ;
case XML_EXP_SEQ :
c = expr - > exp_left ;
if ( ( c - > type = = XML_EXP_SEQ ) | | ( c - > type = = XML_EXP_OR ) )
xmlExpDumpInt ( buf , c , 1 ) ;
else
xmlExpDumpInt ( buf , c , 0 ) ;
xmlBufferWriteChar ( buf , " , " ) ;
c = expr - > exp_right ;
if ( ( c - > type = = XML_EXP_SEQ ) | | ( c - > type = = XML_EXP_OR ) )
xmlExpDumpInt ( buf , c , 1 ) ;
else
xmlExpDumpInt ( buf , c , 0 ) ;
break ;
case XML_EXP_OR :
c = expr - > exp_left ;
if ( ( c - > type = = XML_EXP_SEQ ) | | ( c - > type = = XML_EXP_OR ) )
xmlExpDumpInt ( buf , c , 1 ) ;
else
xmlExpDumpInt ( buf , c , 0 ) ;
xmlBufferWriteChar ( buf , " | " ) ;
c = expr - > exp_right ;
if ( ( c - > type = = XML_EXP_SEQ ) | | ( c - > type = = XML_EXP_OR ) )
xmlExpDumpInt ( buf , c , 1 ) ;
else
xmlExpDumpInt ( buf , c , 0 ) ;
break ;
case XML_EXP_COUNT : {
char rep [ 40 ] ;
2012-09-11 09:26:36 +04:00
2005-08-22 16:07:04 +04:00
c = expr - > exp_left ;
if ( ( c - > type = = XML_EXP_SEQ ) | | ( c - > type = = XML_EXP_OR ) )
xmlExpDumpInt ( buf , c , 1 ) ;
else
xmlExpDumpInt ( buf , c , 0 ) ;
if ( ( expr - > exp_min = = 0 ) & & ( expr - > exp_max = = 1 ) ) {
rep [ 0 ] = ' ? ' ;
rep [ 1 ] = 0 ;
} else if ( ( expr - > exp_min = = 0 ) & & ( expr - > exp_max = = - 1 ) ) {
rep [ 0 ] = ' * ' ;
rep [ 1 ] = 0 ;
} else if ( ( expr - > exp_min = = 1 ) & & ( expr - > exp_max = = - 1 ) ) {
rep [ 0 ] = ' + ' ;
rep [ 1 ] = 0 ;
} else if ( expr - > exp_max = = expr - > exp_min ) {
snprintf ( rep , 39 , " {%d} " , expr - > exp_min ) ;
} else if ( expr - > exp_max < 0 ) {
snprintf ( rep , 39 , " {%d,inf} " , expr - > exp_min ) ;
} else {
snprintf ( rep , 39 , " {%d,%d} " , expr - > exp_min , expr - > exp_max ) ;
}
rep [ 39 ] = 0 ;
xmlBufferWriteChar ( buf , rep ) ;
break ;
}
default :
fprintf ( stderr , " Error in tree \n " ) ;
}
if ( glob )
xmlBufferWriteChar ( buf , " ) " ) ;
}
/**
* xmlExpDump :
* @ buf : a buffer to receive the output
* @ expr : the compiled expression
*
* Serialize the expression as compiled to the buffer
*/
void
2005-08-23 01:22:27 +04:00
xmlExpDump ( xmlBufferPtr buf , xmlExpNodePtr expr ) {
if ( ( buf = = NULL ) | | ( expr = = NULL ) )
2005-08-22 16:07:04 +04:00
return ;
2005-08-23 01:22:27 +04:00
xmlExpDumpInt ( buf , expr , 0 ) ;
2005-08-22 16:07:04 +04:00
}
/**
* xmlExpMaxToken :
* @ expr : a compiled expression
*
* Indicate the maximum number of input a expression can accept
*
* Returns the maximum length or - 1 in case of error
*/
int
xmlExpMaxToken ( xmlExpNodePtr expr ) {
if ( expr = = NULL )
return ( - 1 ) ;
return ( expr - > c_max ) ;
}
/**
* xmlExpCtxtNbNodes :
* @ ctxt : an expression context
*
* Debugging facility provides the number of allocated nodes at a that point
*
* Returns the number of nodes in use or - 1 in case of error
*/
int
xmlExpCtxtNbNodes ( xmlExpCtxtPtr ctxt ) {
if ( ctxt = = NULL )
return ( - 1 ) ;
return ( ctxt - > nb_nodes ) ;
}
/**
* xmlExpCtxtNbCons :
* @ ctxt : an expression context
*
* Debugging facility provides the number of allocated nodes over lifetime
*
* Returns the number of nodes ever allocated or - 1 in case of error
*/
int
xmlExpCtxtNbCons ( xmlExpCtxtPtr ctxt ) {
if ( ctxt = = NULL )
return ( - 1 ) ;
return ( ctxt - > nb_cons ) ;
}
2024-06-16 01:11:41 +03:00
/** DOC_ENABLE */
2005-08-22 04:20:58 +04:00
# endif /* LIBXML_EXPR_ENABLED */
2022-02-20 20:46:42 +03:00
2002-04-16 19:50:10 +04:00
# endif /* LIBXML_REGEXP_ENABLED */