2001-10-19 18:36:57 +04:00
/*
2004-03-30 23:35:44 +04:00
* Copyright ( C ) 2001 - 2004 Sistina Software , Inc . All rights reserved .
* Copyright ( C ) 2004 Red Hat , Inc . All rights reserved .
2001-10-19 18:36:57 +04:00
*
2004-03-30 23:35:44 +04:00
* This file is part of LVM2 .
*
* This copyrighted material is made available to anyone wishing to use ,
* modify , copy , or redistribute it subject to the terms and conditions
* of the GNU General Public License v .2 .
*
* You should have received a copy of the GNU General Public License
* along with this program ; if not , write to the Free Software Foundation ,
* Inc . , 59 Temple Place , Suite 330 , Boston , MA 02111 - 1307 USA
2001-10-19 18:36:57 +04:00
*/
2002-11-18 17:01:16 +03:00
# include "lib.h"
2001-10-19 18:36:57 +04:00
# include "parse_rx.h"
struct parse_sp { /* scratch pad for the parsing process */
2005-10-17 03:03:59 +04:00
struct dm_pool * mem ;
2001-10-19 18:36:57 +04:00
int type ; /* token type, 0 indicates a charset */
2005-10-17 03:03:59 +04:00
dm_bitset_t charset ; /* The current charset */
2001-10-19 18:36:57 +04:00
const char * cursor ; /* where we are in the regex */
const char * rx_end ; /* 1pte for the expression being parsed */
} ;
2001-10-21 14:24:10 +04:00
static struct rx_node * _or_term ( struct parse_sp * ps ) ;
2001-10-19 18:36:57 +04:00
2002-08-29 18:46:30 +04:00
static void _single_char ( struct parse_sp * ps , unsigned int c , const char * ptr )
{
ps - > type = 0 ;
ps - > cursor = ptr + 1 ;
2005-10-17 03:03:59 +04:00
dm_bit_clear_all ( ps - > charset ) ;
dm_bit_set ( ps - > charset , c ) ;
2002-08-29 18:46:30 +04:00
}
2001-10-19 18:36:57 +04:00
/*
* Get the next token from the regular expression .
* Returns : 1 success , 0 end of input , - 1 error .
*/
2006-04-19 19:33:07 +04:00
static int _rx_get_token ( struct parse_sp * ps )
2001-10-19 18:36:57 +04:00
{
int neg = 0 , range = 0 ;
char c , lc = 0 ;
const char * ptr = ps - > cursor ;
2002-04-24 22:20:51 +04:00
if ( ptr = = ps - > rx_end ) { /* end of input ? */
2001-10-19 18:36:57 +04:00
ps - > type = - 1 ;
return 0 ;
}
2002-04-24 22:20:51 +04:00
switch ( * ptr ) {
/* charsets and ncharsets */
2001-10-19 18:36:57 +04:00
case ' [ ' :
ptr + + ;
2002-04-24 22:20:51 +04:00
if ( * ptr = = ' ^ ' ) {
2005-10-17 03:03:59 +04:00
dm_bit_set_all ( ps - > charset ) ;
2001-10-19 18:36:57 +04:00
/* never transition on zero */
2005-10-17 03:03:59 +04:00
dm_bit_clear ( ps - > charset , 0 ) ;
2001-10-19 18:36:57 +04:00
neg = 1 ;
ptr + + ;
} else
2005-10-17 03:03:59 +04:00
dm_bit_clear_all ( ps - > charset ) ;
2001-10-19 18:36:57 +04:00
2002-04-24 22:20:51 +04:00
while ( ( ptr < ps - > rx_end ) & & ( * ptr ! = ' ] ' ) ) {
if ( * ptr = = ' \\ ' ) {
2001-10-19 18:36:57 +04:00
/* an escaped character */
ptr + + ;
2002-04-24 22:20:51 +04:00
switch ( * ptr ) {
case ' n ' :
c = ' \n ' ;
break ;
case ' r ' :
c = ' \r ' ;
break ;
case ' t ' :
c = ' \t ' ;
break ;
2001-10-19 18:36:57 +04:00
default :
c = * ptr ;
}
2002-04-24 22:20:51 +04:00
} else if ( * ptr = = ' - ' & & lc ) {
2001-10-19 18:36:57 +04:00
/* we've got a range on our hands */
range = 1 ;
ptr + + ;
2002-04-24 22:20:51 +04:00
if ( ptr = = ps - > rx_end ) {
2002-01-28 00:30:47 +03:00
log_error ( " Incomplete range "
2002-04-24 22:20:51 +04:00
" specification " ) ;
2001-10-19 18:36:57 +04:00
return - 1 ;
}
c = * ptr ;
} else
c = * ptr ;
2002-04-24 22:20:51 +04:00
if ( range ) {
2001-10-19 18:36:57 +04:00
/* add lc - c into the bitset */
2002-04-24 22:20:51 +04:00
if ( lc > c ) {
2001-10-19 18:36:57 +04:00
char tmp = c ;
c = lc ;
lc = tmp ;
}
2002-04-24 22:20:51 +04:00
for ( ; lc < = c ; lc + + ) {
if ( neg )
2005-10-17 03:03:59 +04:00
dm_bit_clear ( ps - > charset , lc ) ;
2001-10-19 18:36:57 +04:00
else
2005-10-17 03:03:59 +04:00
dm_bit_set ( ps - > charset , lc ) ;
2001-10-19 18:36:57 +04:00
}
range = 0 ;
} else {
/* add c into the bitset */
2002-04-24 22:20:51 +04:00
if ( neg )
2005-10-17 03:03:59 +04:00
dm_bit_clear ( ps - > charset , c ) ;
2001-10-19 18:36:57 +04:00
else
2005-10-17 03:03:59 +04:00
dm_bit_set ( ps - > charset , c ) ;
2001-10-19 18:36:57 +04:00
}
ptr + + ;
lc = c ;
}
2002-04-24 22:20:51 +04:00
if ( ptr > = ps - > rx_end ) {
2001-10-19 18:36:57 +04:00
ps - > type = - 1 ;
return - 1 ;
}
ps - > type = 0 ;
ps - > cursor = ptr + 1 ;
break ;
/* These characters are special, we just return their ASCII
codes as the type . Sorted into ascending order to help the
compiler */
case ' ( ' :
case ' ) ' :
case ' * ' :
case ' + ' :
case ' ? ' :
case ' | ' :
ps - > type = ( int ) * ptr ;
ps - > cursor = ptr + 1 ;
break ;
2002-08-29 18:46:30 +04:00
case ' ^ ' :
_single_char ( ps , HAT_CHAR , ptr ) ;
break ;
case ' $ ' :
_single_char ( ps , DOLLAR_CHAR , ptr ) ;
break ;
2001-10-19 18:36:57 +04:00
case ' . ' :
/* The 'all but newline' character set */
ps - > type = 0 ;
ps - > cursor = ptr + 1 ;
2005-10-17 03:03:59 +04:00
dm_bit_set_all ( ps - > charset ) ;
dm_bit_clear ( ps - > charset , ( int ) ' \n ' ) ;
dm_bit_clear ( ps - > charset , ( int ) ' \r ' ) ;
dm_bit_clear ( ps - > charset , 0 ) ;
2001-10-19 18:36:57 +04:00
break ;
case ' \\ ' :
/* escaped character */
ptr + + ;
2002-04-24 22:20:51 +04:00
if ( ptr > = ps - > rx_end ) {
2002-01-28 00:30:47 +03:00
log_error ( " Badly quoted character at end "
" of expression " ) ;
2001-10-19 18:36:57 +04:00
ps - > type = - 1 ;
return - 1 ;
}
ps - > type = 0 ;
ps - > cursor = ptr + 1 ;
2005-10-17 03:03:59 +04:00
dm_bit_clear_all ( ps - > charset ) ;
2002-04-24 22:20:51 +04:00
switch ( * ptr ) {
case ' n ' :
2005-10-17 03:03:59 +04:00
dm_bit_set ( ps - > charset , ( int ) ' \n ' ) ;
2002-04-24 22:20:51 +04:00
break ;
case ' r ' :
2005-10-17 03:03:59 +04:00
dm_bit_set ( ps - > charset , ( int ) ' \r ' ) ;
2002-04-24 22:20:51 +04:00
break ;
case ' t ' :
2005-10-17 03:03:59 +04:00
dm_bit_set ( ps - > charset , ( int ) ' \t ' ) ;
2002-04-24 22:20:51 +04:00
break ;
2001-10-19 18:36:57 +04:00
default :
2005-10-17 03:03:59 +04:00
dm_bit_set ( ps - > charset , ( int ) * ptr ) ;
2001-10-19 18:36:57 +04:00
}
break ;
default :
/* add a single character to the bitset */
ps - > type = 0 ;
ps - > cursor = ptr + 1 ;
2005-10-17 03:03:59 +04:00
dm_bit_clear_all ( ps - > charset ) ;
dm_bit_set ( ps - > charset , ( int ) * ptr ) ;
2001-10-19 18:36:57 +04:00
break ;
}
return 1 ;
}
2005-10-17 03:03:59 +04:00
static struct rx_node * _node ( struct dm_pool * mem , int type ,
2001-10-21 14:24:10 +04:00
struct rx_node * l , struct rx_node * r )
2001-10-19 18:36:57 +04:00
{
2005-10-17 03:03:59 +04:00
struct rx_node * n = dm_pool_zalloc ( mem , sizeof ( * n ) ) ;
2001-10-19 18:36:57 +04:00
if ( n ) {
2005-10-17 03:03:59 +04:00
if ( ! ( n - > charset = dm_bitset_create ( mem , 256 ) ) ) {
dm_pool_free ( mem , n ) ;
2001-10-19 18:36:57 +04:00
return NULL ;
}
n - > type = type ;
n - > left = l ;
n - > right = r ;
}
return n ;
}
static struct rx_node * _term ( struct parse_sp * ps )
{
struct rx_node * n ;
2002-04-24 22:20:51 +04:00
switch ( ps - > type ) {
2001-10-19 18:36:57 +04:00
case 0 :
2001-10-21 14:24:10 +04:00
if ( ! ( n = _node ( ps - > mem , CHARSET , NULL , NULL ) ) ) {
2001-10-19 18:36:57 +04:00
stack ;
return NULL ;
}
2005-10-17 03:03:59 +04:00
dm_bit_copy ( n - > charset , ps - > charset ) ;
2006-04-19 19:33:07 +04:00
_rx_get_token ( ps ) ; /* match charset */
2001-10-19 18:36:57 +04:00
break ;
case ' ( ' :
2006-04-19 19:33:07 +04:00
_rx_get_token ( ps ) ; /* match '(' */
2001-10-21 14:24:10 +04:00
n = _or_term ( ps ) ;
2002-04-24 22:20:51 +04:00
if ( ps - > type ! = ' ) ' ) {
2002-01-28 00:30:47 +03:00
log_error ( " missing ')' in regular expression " ) ;
2001-10-19 18:36:57 +04:00
return 0 ;
}
2006-04-19 19:33:07 +04:00
_rx_get_token ( ps ) ; /* match ')' */
2001-10-19 18:36:57 +04:00
break ;
default :
n = 0 ;
}
return n ;
}
static struct rx_node * _closure_term ( struct parse_sp * ps )
{
struct rx_node * l , * n ;
2002-04-24 22:20:51 +04:00
if ( ! ( l = _term ( ps ) ) )
2001-10-19 18:36:57 +04:00
return NULL ;
2001-10-21 14:24:10 +04:00
for ( ; ; ) {
2002-04-24 22:20:51 +04:00
switch ( ps - > type ) {
2001-10-21 14:24:10 +04:00
case ' * ' :
n = _node ( ps - > mem , STAR , l , NULL ) ;
break ;
2001-10-19 18:36:57 +04:00
2001-10-21 14:24:10 +04:00
case ' + ' :
n = _node ( ps - > mem , PLUS , l , NULL ) ;
break ;
2001-10-19 18:36:57 +04:00
2001-10-21 14:24:10 +04:00
case ' ? ' :
n = _node ( ps - > mem , QUEST , l , NULL ) ;
break ;
2001-10-19 18:36:57 +04:00
2001-10-21 14:24:10 +04:00
default :
return l ;
}
2001-10-19 18:36:57 +04:00
2001-10-21 14:24:10 +04:00
if ( ! n ) {
stack ;
return NULL ;
}
2006-04-19 19:33:07 +04:00
_rx_get_token ( ps ) ;
2001-10-21 14:24:10 +04:00
l = n ;
2001-10-19 18:36:57 +04:00
}
return n ;
}
static struct rx_node * _cat_term ( struct parse_sp * ps )
{
struct rx_node * l , * r , * n ;
if ( ! ( l = _closure_term ( ps ) ) )
return NULL ;
if ( ps - > type = = ' | ' )
return l ;
if ( ! ( r = _cat_term ( ps ) ) )
return l ;
2001-10-21 14:24:10 +04:00
if ( ! ( n = _node ( ps - > mem , CAT , l , r ) ) )
2001-10-19 18:36:57 +04:00
stack ;
return n ;
}
2001-10-21 14:24:10 +04:00
static struct rx_node * _or_term ( struct parse_sp * ps )
2001-10-19 18:36:57 +04:00
{
2001-10-21 14:24:10 +04:00
struct rx_node * l , * r , * n ;
2001-10-19 18:36:57 +04:00
2001-10-21 14:24:10 +04:00
if ( ! ( l = _cat_term ( ps ) ) )
return NULL ;
2001-10-19 18:36:57 +04:00
2001-10-21 14:24:10 +04:00
if ( ps - > type ! = ' | ' )
return l ;
2001-10-19 18:36:57 +04:00
2006-04-19 19:33:07 +04:00
_rx_get_token ( ps ) ; /* match '|' */
2001-10-19 18:36:57 +04:00
2001-10-21 14:24:10 +04:00
if ( ! ( r = _or_term ( ps ) ) ) {
2002-01-28 00:30:47 +03:00
log_error ( " Badly formed 'or' expression " ) ;
2001-10-21 14:24:10 +04:00
return NULL ;
2001-10-19 18:36:57 +04:00
}
2001-10-21 14:24:10 +04:00
if ( ! ( n = _node ( ps - > mem , OR , l , r ) ) )
stack ;
2001-10-19 18:36:57 +04:00
return n ;
}
2005-10-17 03:03:59 +04:00
struct rx_node * rx_parse_tok ( struct dm_pool * mem ,
2001-10-19 18:36:57 +04:00
const char * begin , const char * end )
{
struct rx_node * r ;
2005-10-17 03:03:59 +04:00
struct parse_sp * ps = dm_pool_zalloc ( mem , sizeof ( * ps ) ) ;
2001-10-19 18:36:57 +04:00
if ( ! ps ) {
stack ;
return NULL ;
}
ps - > mem = mem ;
2005-10-17 03:03:59 +04:00
ps - > charset = dm_bitset_create ( mem , 256 ) ;
2001-10-19 18:36:57 +04:00
ps - > cursor = begin ;
ps - > rx_end = end ;
2006-04-19 19:33:07 +04:00
_rx_get_token ( ps ) ; /* load the first token */
2001-10-21 14:24:10 +04:00
if ( ! ( r = _or_term ( ps ) ) ) {
2002-01-28 00:30:47 +03:00
log_error ( " Parse error in regex " ) ;
2005-10-17 03:03:59 +04:00
dm_pool_free ( mem , ps ) ;
2001-10-21 14:24:10 +04:00
}
2001-10-19 18:36:57 +04:00
return r ;
}
2005-10-17 03:03:59 +04:00
struct rx_node * rx_parse_str ( struct dm_pool * mem , const char * str )
2001-10-19 18:36:57 +04:00
{
return rx_parse_tok ( mem , str , str + strlen ( str ) ) ;
}