2007-04-27 22:40:23 +04:00
/*
* Copyright ( C ) 2001 - 2004 Sistina Software , Inc . All rights reserved .
* Copyright ( C ) 2004 - 2007 Red Hat , Inc . All rights reserved .
*
* This file is part of the device - mapper userspace tools .
*
* This copyrighted material is made available to anyone wishing to use ,
* modify , copy , or redistribute it subject to the terms and conditions
2007-08-21 20:26:07 +04:00
* of the GNU Lesser General Public License v .2 .1 .
2007-04-27 22:40:23 +04:00
*
2007-08-21 20:26:07 +04:00
* You should have received a copy of the GNU Lesser General Public License
2007-04-27 22:40:23 +04:00
* along with this program ; if not , write to the Free Software Foundation ,
* Inc . , 59 Temple Place , Suite 330 , Boston , MA 02111 - 1307 USA
*/
2008-11-03 21:59:59 +03:00
# include "dmlib.h"
2007-04-27 22:40:23 +04:00
# include "parse_rx.h"
# include "ttree.h"
# include "assert.h"
struct dfa_state {
int final ;
struct dfa_state * lookup [ 256 ] ;
} ;
struct state_queue {
struct dfa_state * s ;
dm_bitset_t bits ;
struct state_queue * next ;
} ;
struct dm_regex { /* Instance variables for the lexer */
struct dfa_state * start ;
unsigned num_nodes ;
int nodes_entered ;
struct rx_node * * nodes ;
struct dm_pool * scratch , * mem ;
} ;
# define TARGET_TRANS '\0'
static int _count_nodes ( struct rx_node * rx )
{
int r = 1 ;
if ( rx - > left )
r + = _count_nodes ( rx - > left ) ;
if ( rx - > right )
r + = _count_nodes ( rx - > right ) ;
return r ;
}
static void _fill_table ( struct dm_regex * m , struct rx_node * rx )
{
assert ( ( rx - > type ! = OR ) | | ( rx - > left & & rx - > right ) ) ;
if ( rx - > left )
_fill_table ( m , rx - > left ) ;
if ( rx - > right )
_fill_table ( m , rx - > right ) ;
m - > nodes [ m - > nodes_entered + + ] = rx ;
}
static void _create_bitsets ( struct dm_regex * m )
{
int i ;
for ( i = 0 ; i < m - > num_nodes ; i + + ) {
struct rx_node * n = m - > nodes [ i ] ;
n - > firstpos = dm_bitset_create ( m - > scratch , m - > num_nodes ) ;
n - > lastpos = dm_bitset_create ( m - > scratch , m - > num_nodes ) ;
n - > followpos = dm_bitset_create ( m - > scratch , m - > num_nodes ) ;
}
}
static void _calc_functions ( struct dm_regex * m )
{
int i , j , final = 1 ;
struct rx_node * rx , * c1 , * c2 ;
for ( i = 0 ; i < m - > num_nodes ; i + + ) {
rx = m - > nodes [ i ] ;
c1 = rx - > left ;
c2 = rx - > right ;
if ( dm_bit ( rx - > charset , TARGET_TRANS ) )
rx - > final = final + + ;
switch ( rx - > type ) {
case CAT :
if ( c1 - > nullable )
dm_bit_union ( rx - > firstpos ,
c1 - > firstpos , c2 - > firstpos ) ;
else
dm_bit_copy ( rx - > firstpos , c1 - > firstpos ) ;
if ( c2 - > nullable )
dm_bit_union ( rx - > lastpos ,
c1 - > lastpos , c2 - > lastpos ) ;
else
dm_bit_copy ( rx - > lastpos , c2 - > lastpos ) ;
rx - > nullable = c1 - > nullable & & c2 - > nullable ;
break ;
case PLUS :
dm_bit_copy ( rx - > firstpos , c1 - > firstpos ) ;
dm_bit_copy ( rx - > lastpos , c1 - > lastpos ) ;
rx - > nullable = c1 - > nullable ;
break ;
case OR :
dm_bit_union ( rx - > firstpos , c1 - > firstpos , c2 - > firstpos ) ;
dm_bit_union ( rx - > lastpos , c1 - > lastpos , c2 - > lastpos ) ;
rx - > nullable = c1 - > nullable | | c2 - > nullable ;
break ;
case QUEST :
case STAR :
dm_bit_copy ( rx - > firstpos , c1 - > firstpos ) ;
dm_bit_copy ( rx - > lastpos , c1 - > lastpos ) ;
rx - > nullable = 1 ;
break ;
case CHARSET :
dm_bit_set ( rx - > firstpos , i ) ;
dm_bit_set ( rx - > lastpos , i ) ;
rx - > nullable = 0 ;
break ;
default :
log_error ( " Internal error: Unknown calc node type " ) ;
}
/*
* followpos has it ' s own switch
* because PLUS and STAR do the
* same thing .
*/
switch ( rx - > type ) {
case CAT :
for ( j = 0 ; j < m - > num_nodes ; j + + ) {
if ( dm_bit ( c1 - > lastpos , j ) ) {
struct rx_node * n = m - > nodes [ j ] ;
dm_bit_union ( n - > followpos ,
n - > followpos , c2 - > firstpos ) ;
}
}
break ;
case PLUS :
case STAR :
for ( j = 0 ; j < m - > num_nodes ; j + + ) {
if ( dm_bit ( rx - > lastpos , j ) ) {
struct rx_node * n = m - > nodes [ j ] ;
dm_bit_union ( n - > followpos ,
n - > followpos , rx - > firstpos ) ;
}
}
break ;
}
}
}
static struct dfa_state * _create_dfa_state ( struct dm_pool * mem )
{
return dm_pool_zalloc ( mem , sizeof ( struct dfa_state ) ) ;
}
static struct state_queue * _create_state_queue ( struct dm_pool * mem ,
struct dfa_state * dfa ,
dm_bitset_t bits )
{
struct state_queue * r = dm_pool_alloc ( mem , sizeof ( * r ) ) ;
if ( ! r ) {
stack ;
return NULL ;
}
r - > s = dfa ;
r - > bits = dm_bitset_create ( mem , bits [ 0 ] ) ; /* first element is the size */
dm_bit_copy ( r - > bits , bits ) ;
r - > next = 0 ;
return r ;
}
static int _calc_states ( struct dm_regex * m , struct rx_node * rx )
{
unsigned iwidth = ( m - > num_nodes / DM_BITS_PER_INT ) + 1 ;
struct ttree * tt = ttree_create ( m - > scratch , iwidth ) ;
struct state_queue * h , * t , * tmp ;
struct dfa_state * dfa , * ldfa ;
int i , a , set_bits = 0 , count = 0 ;
dm_bitset_t bs , dfa_bits ;
if ( ! tt )
return_0 ;
if ( ! ( bs = dm_bitset_create ( m - > scratch , m - > num_nodes ) ) )
return_0 ;
/* create first state */
dfa = _create_dfa_state ( m - > mem ) ;
m - > start = dfa ;
ttree_insert ( tt , rx - > firstpos + 1 , dfa ) ;
/* prime the queue */
h = t = _create_state_queue ( m - > scratch , dfa , rx - > firstpos ) ;
while ( h ) {
/* pop state off front of the queue */
dfa = h - > s ;
dfa_bits = h - > bits ;
h = h - > next ;
/* iterate through all the inputs for this state */
dm_bit_clear_all ( bs ) ;
for ( a = 0 ; a < 256 ; a + + ) {
/* iterate through all the states in firstpos */
for ( i = dm_bit_get_first ( dfa_bits ) ;
i > = 0 ; i = dm_bit_get_next ( dfa_bits , i ) ) {
if ( dm_bit ( m - > nodes [ i ] - > charset , a ) ) {
if ( a = = TARGET_TRANS )
dfa - > final = m - > nodes [ i ] - > final ;
dm_bit_union ( bs , bs ,
m - > nodes [ i ] - > followpos ) ;
set_bits = 1 ;
}
}
if ( set_bits ) {
ldfa = ttree_lookup ( tt , bs + 1 ) ;
if ( ! ldfa ) {
/* push */
ldfa = _create_dfa_state ( m - > mem ) ;
ttree_insert ( tt , bs + 1 , ldfa ) ;
tmp =
_create_state_queue ( m - > scratch ,
ldfa , bs ) ;
if ( ! h )
h = t = tmp ;
else {
t - > next = tmp ;
t = tmp ;
}
count + + ;
}
dfa - > lookup [ a ] = ldfa ;
set_bits = 0 ;
dm_bit_clear_all ( bs ) ;
}
}
}
log_debug ( " Matcher built with %d dfa states " , count ) ;
return 1 ;
}
struct dm_regex * dm_regex_create ( struct dm_pool * mem , const char * * patterns ,
unsigned num_patterns )
{
char * all , * ptr ;
int i ;
size_t len = 0 ;
struct rx_node * rx ;
struct dm_pool * scratch = dm_pool_create ( " regex matcher " , 10 * 1024 ) ;
struct dm_regex * m ;
if ( ! scratch )
return_NULL ;
if ( ! ( m = dm_pool_alloc ( mem , sizeof ( * m ) ) ) ) {
dm_pool_destroy ( scratch ) ;
return_NULL ;
}
memset ( m , 0 , sizeof ( * m ) ) ;
/* join the regexps together, delimiting with zero */
for ( i = 0 ; i < num_patterns ; i + + )
len + = strlen ( patterns [ i ] ) + 8 ;
ptr = all = dm_pool_alloc ( scratch , len + 1 ) ;
if ( ! all )
goto_bad ;
for ( i = 0 ; i < num_patterns ; i + + ) {
ptr + = sprintf ( ptr , " (.*(%s)%c) " , patterns [ i ] , TARGET_TRANS ) ;
if ( i < ( num_patterns - 1 ) )
* ptr + + = ' | ' ;
}
/* parse this expression */
if ( ! ( rx = rx_parse_tok ( scratch , all , ptr ) ) ) {
log_error ( " Couldn't parse regex " ) ;
goto bad ;
}
m - > mem = mem ;
m - > scratch = scratch ;
m - > num_nodes = _count_nodes ( rx ) ;
m - > nodes = dm_pool_alloc ( scratch , sizeof ( * m - > nodes ) * m - > num_nodes ) ;
if ( ! m - > nodes )
goto_bad ;
_fill_table ( m , rx ) ;
_create_bitsets ( m ) ;
_calc_functions ( m ) ;
_calc_states ( m , rx ) ;
dm_pool_destroy ( scratch ) ;
m - > scratch = NULL ;
return m ;
bad :
dm_pool_destroy ( scratch ) ;
dm_pool_free ( mem , m ) ;
return NULL ;
}
static struct dfa_state * _step_matcher ( int c , struct dfa_state * cs , int * r )
{
if ( ! ( cs = cs - > lookup [ ( unsigned char ) c ] ) )
return NULL ;
if ( cs - > final & & ( cs - > final > * r ) )
* r = cs - > final ;
return cs ;
}
int dm_regex_match ( struct dm_regex * regex , const char * s )
{
struct dfa_state * cs = regex - > start ;
int r = 0 ;
if ( ! ( cs = _step_matcher ( HAT_CHAR , cs , & r ) ) )
goto out ;
for ( ; * s ; s + + )
if ( ! ( cs = _step_matcher ( * s , cs , & r ) ) )
goto out ;
_step_matcher ( DOLLAR_CHAR , cs , & r ) ;
out :
/* subtract 1 to get back to zero index */
return r - 1 ;
}