2001-10-19 18:36:57 +04:00
/*
* Copyright ( C ) 2001 Sistina Software ( UK ) Limited .
*
2001-10-31 15:47:01 +03:00
* This file is released under the LGPL .
2001-10-19 18:36:57 +04:00
*/
# include "matcher.h"
# include "parse_rx.h"
# include "log.h"
# include "ttree.h"
# include "bitset.h"
# include <string.h>
# include <stdio.h>
# include <assert.h>
struct dfa_state {
int final ;
struct dfa_state * lookup [ 256 ] ;
} ;
struct state_queue {
struct dfa_state * s ;
bitset_t bits ;
struct state_queue * next ;
} ;
2002-04-24 22:20:51 +04:00
struct matcher { /* Instance variables for the lexer */
2001-10-19 18:36:57 +04:00
struct dfa_state * start ;
int num_nodes , nodes_entered ;
struct rx_node * * nodes ;
struct pool * scratch , * mem ;
} ;
# define TARGET_TRANS '\0'
static int _count_nodes ( struct rx_node * rx )
{
int r = 1 ;
2002-04-24 22:20:51 +04:00
if ( rx - > left )
2001-10-19 18:36:57 +04:00
r + = _count_nodes ( rx - > left ) ;
2002-04-24 22:20:51 +04:00
if ( rx - > right )
2001-10-19 18:36:57 +04:00
r + = _count_nodes ( rx - > right ) ;
return r ;
}
static void _fill_table ( struct matcher * m , struct rx_node * rx )
{
assert ( ( rx - > type ! = OR ) | | ( rx - > left & & rx - > right ) ) ;
2002-04-24 22:20:51 +04:00
if ( rx - > left )
2001-10-19 18:36:57 +04:00
_fill_table ( m , rx - > left ) ;
2002-04-24 22:20:51 +04:00
if ( rx - > right )
2001-10-19 18:36:57 +04:00
_fill_table ( m , rx - > right ) ;
m - > nodes [ m - > nodes_entered + + ] = rx ;
}
static void _create_bitsets ( struct matcher * m )
{
int i ;
2002-04-24 22:20:51 +04:00
for ( i = 0 ; i < m - > num_nodes ; i + + ) {
2001-10-19 18:36:57 +04:00
struct rx_node * n = m - > nodes [ i ] ;
n - > firstpos = bitset_create ( m - > scratch , m - > num_nodes ) ;
n - > lastpos = bitset_create ( m - > scratch , m - > num_nodes ) ;
n - > followpos = bitset_create ( m - > scratch , m - > num_nodes ) ;
}
}
static void _calc_functions ( struct matcher * m )
{
int i , j , final = 1 ;
struct rx_node * rx , * c1 , * c2 ;
2002-04-24 22:20:51 +04:00
for ( i = 0 ; i < m - > num_nodes ; i + + ) {
2001-10-19 18:36:57 +04:00
rx = m - > nodes [ i ] ;
c1 = rx - > left ;
c2 = rx - > right ;
2002-04-24 22:20:51 +04:00
if ( bit ( rx - > charset , TARGET_TRANS ) )
2001-10-19 18:36:57 +04:00
rx - > final = final + + ;
2002-04-24 22:20:51 +04:00
switch ( rx - > type ) {
2001-10-19 18:36:57 +04:00
case CAT :
2002-04-24 22:20:51 +04:00
if ( c1 - > nullable )
2001-10-19 18:36:57 +04:00
bit_union ( rx - > firstpos ,
c1 - > firstpos , c2 - > firstpos ) ;
else
bit_copy ( rx - > firstpos , c1 - > firstpos ) ;
2002-04-24 22:20:51 +04:00
if ( c2 - > nullable )
2001-10-19 18:36:57 +04:00
bit_union ( rx - > lastpos ,
c1 - > lastpos , c2 - > lastpos ) ;
else
bit_copy ( rx - > lastpos , c2 - > lastpos ) ;
rx - > nullable = c1 - > nullable & & c2 - > nullable ;
break ;
case PLUS :
bit_copy ( rx - > firstpos , c1 - > firstpos ) ;
bit_copy ( rx - > lastpos , c1 - > lastpos ) ;
rx - > nullable = c1 - > nullable ;
break ;
case OR :
bit_union ( rx - > firstpos , c1 - > firstpos , c2 - > firstpos ) ;
bit_union ( rx - > lastpos , c1 - > lastpos , c2 - > lastpos ) ;
rx - > nullable = c1 - > nullable | | c2 - > nullable ;
break ;
case QUEST :
case STAR :
bit_copy ( rx - > firstpos , c1 - > firstpos ) ;
bit_copy ( rx - > lastpos , c1 - > lastpos ) ;
rx - > nullable = 1 ;
break ;
case CHARSET :
bit_set ( rx - > firstpos , i ) ;
bit_set ( rx - > lastpos , i ) ;
rx - > nullable = 0 ;
break ;
default :
2002-01-28 00:30:47 +03:00
log_error ( " Internal error: Unknown calc node type " ) ;
2001-10-19 18:36:57 +04:00
}
/*
* followpos has it ' s own switch
* because PLUS and STAR do the
* same thing .
*/
2002-04-24 22:20:51 +04:00
switch ( rx - > type ) {
2001-10-19 18:36:57 +04:00
case CAT :
2002-04-24 22:20:51 +04:00
for ( j = 0 ; j < m - > num_nodes ; j + + ) {
if ( bit ( c1 - > lastpos , j ) ) {
2001-10-19 18:36:57 +04:00
struct rx_node * n = m - > nodes [ j ] ;
bit_union ( n - > followpos ,
n - > followpos , c2 - > firstpos ) ;
}
}
break ;
case PLUS :
case STAR :
2002-04-24 22:20:51 +04:00
for ( j = 0 ; j < m - > num_nodes ; j + + ) {
if ( bit ( rx - > lastpos , j ) ) {
2001-10-19 18:36:57 +04:00
struct rx_node * n = m - > nodes [ j ] ;
bit_union ( n - > followpos ,
n - > followpos , rx - > firstpos ) ;
}
}
break ;
}
}
}
static inline struct dfa_state * _create_dfa_state ( struct pool * mem )
{
return pool_zalloc ( mem , sizeof ( struct dfa_state ) ) ;
}
static struct state_queue * _create_state_queue ( struct pool * mem ,
struct dfa_state * dfa ,
bitset_t bits )
{
struct state_queue * r = pool_alloc ( mem , sizeof ( * r ) ) ;
if ( ! r ) {
stack ;
return NULL ;
}
r - > s = dfa ;
2002-04-24 22:20:51 +04:00
r - > bits = bitset_create ( mem , bits [ 0 ] ) ; /* first element is the size */
2001-10-19 18:36:57 +04:00
bit_copy ( r - > bits , bits ) ;
r - > next = 0 ;
return r ;
}
static int _calc_states ( struct matcher * m , struct rx_node * rx )
{
int iwidth = ( m - > num_nodes / BITS_PER_INT ) + 1 ;
struct ttree * tt = ttree_create ( m - > scratch , iwidth ) ;
struct state_queue * h , * t , * tmp ;
struct dfa_state * dfa , * ldfa ;
int i , a , set_bits = 0 , count = 0 ;
bitset_t bs = bitset_create ( m - > scratch , m - > num_nodes ) , dfa_bits ;
if ( ! tt ) {
stack ;
return 0 ;
}
if ( ! bs ) {
stack ;
return 0 ;
}
/* create first state */
dfa = _create_dfa_state ( m - > mem ) ;
m - > start = dfa ;
o Filter for the dev cache that takes values from config file:
devices {
# first match is final, eg. /dev/ide/cdrom
# get's rejected due to the first pattern
filter=["r/cdrom/", # don't touch the music !
"a/hd[a-d][0-9]+/",
"a/ide/",
"a/sd/",
"a/md/",
"a|loop/[0-9]+|", # accept devfs style loop back
"r/loop/", # and reject old style
"a/dasd/",
"a/dac960/",
"a/nbd/",
"a/ida/",
"a/cciss/",
"a/ubd/",
"r/.*/"] # reject all others
}
Alasdair this is ready to roll into the tools now.
2001-10-19 22:20:37 +04:00
ttree_insert ( tt , rx - > firstpos + 1 , dfa ) ;
2001-10-19 18:36:57 +04:00
/* prime the queue */
h = t = _create_state_queue ( m - > scratch , dfa , rx - > firstpos ) ;
o Filter for the dev cache that takes values from config file:
devices {
# first match is final, eg. /dev/ide/cdrom
# get's rejected due to the first pattern
filter=["r/cdrom/", # don't touch the music !
"a/hd[a-d][0-9]+/",
"a/ide/",
"a/sd/",
"a/md/",
"a|loop/[0-9]+|", # accept devfs style loop back
"r/loop/", # and reject old style
"a/dasd/",
"a/dac960/",
"a/nbd/",
"a/ida/",
"a/cciss/",
"a/ubd/",
"r/.*/"] # reject all others
}
Alasdair this is ready to roll into the tools now.
2001-10-19 22:20:37 +04:00
while ( h ) {
2001-10-19 18:36:57 +04:00
/* pop state off front of the queue */
dfa = h - > s ;
dfa_bits = h - > bits ;
h = h - > next ;
/* iterate through all the inputs for this state */
bit_clear_all ( bs ) ;
2002-04-24 22:20:51 +04:00
for ( a = 0 ; a < 256 ; a + + ) {
2001-10-19 18:36:57 +04:00
/* iterate through all the states in firstpos */
2002-04-24 22:20:51 +04:00
for ( i = bit_get_first ( dfa_bits ) ;
i > = 0 ; i = bit_get_next ( dfa_bits , i ) ) {
if ( bit ( m - > nodes [ i ] - > charset , a ) ) {
if ( a = = TARGET_TRANS )
2001-10-19 18:36:57 +04:00
dfa - > final = m - > nodes [ i ] - > final ;
2002-04-24 22:20:51 +04:00
bit_union ( bs , bs ,
m - > nodes [ i ] - > followpos ) ;
2001-10-19 18:36:57 +04:00
set_bits = 1 ;
}
}
2002-04-24 22:20:51 +04:00
if ( set_bits ) {
2001-10-19 18:36:57 +04:00
ldfa = ttree_lookup ( tt , bs + 1 ) ;
2002-04-24 22:20:51 +04:00
if ( ! ldfa ) {
2001-10-19 18:36:57 +04:00
/* push */
ldfa = _create_dfa_state ( m - > mem ) ;
ttree_insert ( tt , bs + 1 , ldfa ) ;
2002-04-24 22:20:51 +04:00
tmp =
_create_state_queue ( m - > scratch ,
ldfa , bs ) ;
if ( ! h )
2001-10-19 18:36:57 +04:00
h = t = tmp ;
else {
t - > next = tmp ;
t = tmp ;
}
count + + ;
}
dfa - > lookup [ a ] = ldfa ;
set_bits = 0 ;
bit_clear_all ( bs ) ;
}
}
}
o Filter for the dev cache that takes values from config file:
devices {
# first match is final, eg. /dev/ide/cdrom
# get's rejected due to the first pattern
filter=["r/cdrom/", # don't touch the music !
"a/hd[a-d][0-9]+/",
"a/ide/",
"a/sd/",
"a/md/",
"a|loop/[0-9]+|", # accept devfs style loop back
"r/loop/", # and reject old style
"a/dasd/",
"a/dac960/",
"a/nbd/",
"a/ida/",
"a/cciss/",
"a/ubd/",
"r/.*/"] # reject all others
}
Alasdair this is ready to roll into the tools now.
2001-10-19 22:20:37 +04:00
log_debug ( " Matcher built with %d dfa states " , count ) ;
2001-10-19 18:36:57 +04:00
return 1 ;
}
2002-04-24 22:20:51 +04:00
struct matcher * matcher_create ( struct pool * mem , const char * * patterns , int num )
2001-10-19 18:36:57 +04:00
{
char * all , * ptr ;
int i , len = 0 ;
struct rx_node * rx ;
struct pool * scratch = pool_create ( 10 * 1024 ) ;
struct matcher * m ;
if ( ! scratch ) {
stack ;
return NULL ;
}
if ( ! ( m = pool_alloc ( mem , sizeof ( * m ) ) ) ) {
stack ;
return NULL ;
}
memset ( m , 0 , sizeof ( * m ) ) ;
/* join the regexps together, delimiting with zero */
2002-04-24 22:20:51 +04:00
for ( i = 0 ; i < num ; i + + )
2001-10-19 18:36:57 +04:00
len + = strlen ( patterns [ i ] ) + 8 ;
ptr = all = pool_alloc ( scratch , len + 1 ) ;
if ( ! all ) {
stack ;
goto bad ;
}
2002-04-24 22:20:51 +04:00
for ( i = 0 ; i < num ; i + + ) {
ptr + = sprintf ( ptr , " (.*(%s)%c) " , patterns [ i ] , TARGET_TRANS ) ;
if ( i < ( num - 1 ) )
2001-10-19 18:36:57 +04:00
* ptr + + = ' | ' ;
}
/* parse this expression */
2002-04-24 22:20:51 +04:00
if ( ! ( rx = rx_parse_tok ( scratch , all , ptr ) ) ) {
2002-01-28 00:30:47 +03:00
log_error ( " Couldn't parse regex " ) ;
2001-10-19 18:36:57 +04:00
goto bad ;
}
m - > mem = mem ;
m - > scratch = scratch ;
m - > num_nodes = _count_nodes ( rx ) ;
m - > nodes = pool_alloc ( scratch , sizeof ( * m - > nodes ) * m - > num_nodes ) ;
if ( ! m - > nodes ) {
stack ;
goto bad ;
}
_fill_table ( m , rx ) ;
_create_bitsets ( m ) ;
_calc_functions ( m ) ;
_calc_states ( m , rx ) ;
pool_destroy ( scratch ) ;
m - > scratch = NULL ;
return m ;
2002-04-24 22:20:51 +04:00
bad :
2001-10-19 18:36:57 +04:00
pool_destroy ( scratch ) ;
pool_destroy ( mem ) ;
return NULL ;
}
2002-08-29 19:05:16 +04:00
static inline struct dfa_state *
2002-08-29 18:46:30 +04:00
_step_matcher ( unsigned char c , struct dfa_state * cs , int * r )
{
if ( ! ( cs = cs - > lookup [ c ] ) )
return NULL ;
if ( cs - > final & & ( cs - > final > * r ) )
* r = cs - > final ;
return cs ;
}
o Filter for the dev cache that takes values from config file:
devices {
# first match is final, eg. /dev/ide/cdrom
# get's rejected due to the first pattern
filter=["r/cdrom/", # don't touch the music !
"a/hd[a-d][0-9]+/",
"a/ide/",
"a/sd/",
"a/md/",
"a|loop/[0-9]+|", # accept devfs style loop back
"r/loop/", # and reject old style
"a/dasd/",
"a/dac960/",
"a/nbd/",
"a/ida/",
"a/cciss/",
"a/ubd/",
"r/.*/"] # reject all others
}
Alasdair this is ready to roll into the tools now.
2001-10-19 22:20:37 +04:00
int matcher_run ( struct matcher * m , const char * b )
2001-10-19 18:36:57 +04:00
{
struct dfa_state * cs = m - > start ;
o Filter for the dev cache that takes values from config file:
devices {
# first match is final, eg. /dev/ide/cdrom
# get's rejected due to the first pattern
filter=["r/cdrom/", # don't touch the music !
"a/hd[a-d][0-9]+/",
"a/ide/",
"a/sd/",
"a/md/",
"a|loop/[0-9]+|", # accept devfs style loop back
"r/loop/", # and reject old style
"a/dasd/",
"a/dac960/",
"a/nbd/",
"a/ida/",
"a/cciss/",
"a/ubd/",
"r/.*/"] # reject all others
}
Alasdair this is ready to roll into the tools now.
2001-10-19 22:20:37 +04:00
int r = 0 ;
2001-10-19 18:36:57 +04:00
2002-08-29 18:46:30 +04:00
if ( ! ( cs = _step_matcher ( HAT_CHAR , cs , & r ) ) )
goto out ;
2001-10-19 18:36:57 +04:00
2002-08-29 18:46:30 +04:00
for ( ; * b ; b + + )
if ( ! ( cs = _step_matcher ( * b , cs , & r ) ) )
goto out ;
2001-10-19 18:36:57 +04:00
2002-08-29 18:46:30 +04:00
_step_matcher ( DOLLAR_CHAR , cs , & r ) ;
2001-10-19 18:36:57 +04:00
2002-08-29 18:46:30 +04:00
out :
2001-10-19 18:36:57 +04:00
/* subtract 1 to get back to zero index */
o Filter for the dev cache that takes values from config file:
devices {
# first match is final, eg. /dev/ide/cdrom
# get's rejected due to the first pattern
filter=["r/cdrom/", # don't touch the music !
"a/hd[a-d][0-9]+/",
"a/ide/",
"a/sd/",
"a/md/",
"a|loop/[0-9]+|", # accept devfs style loop back
"r/loop/", # and reject old style
"a/dasd/",
"a/dac960/",
"a/nbd/",
"a/ida/",
"a/cciss/",
"a/ubd/",
"r/.*/"] # reject all others
}
Alasdair this is ready to roll into the tools now.
2001-10-19 22:20:37 +04:00
return r - 1 ;
2001-10-19 18:36:57 +04:00
}