mirror of
git://sourceware.org/git/lvm2.git
synced 2025-01-02 01:18:26 +03:00
[REGEX] reduce the number of charset nodes that are produced
This commit is contained in:
parent
2ef5d30611
commit
529edb1853
@ -32,8 +32,11 @@ struct state_queue {
|
||||
struct dm_regex { /* Instance variables for the lexer */
|
||||
struct dfa_state *start;
|
||||
unsigned num_nodes;
|
||||
unsigned num_charsets;
|
||||
int nodes_entered;
|
||||
struct rx_node **nodes;
|
||||
int charsets_entered;
|
||||
struct rx_node **charsets;
|
||||
struct dm_pool *scratch, *mem;
|
||||
};
|
||||
|
||||
@ -50,6 +53,33 @@ static int _count_nodes(struct rx_node *rx)
|
||||
return r;
|
||||
}
|
||||
|
||||
static unsigned _count_charsets(struct rx_node *rx)
|
||||
{
|
||||
if (rx->type == CHARSET)
|
||||
return 1;
|
||||
|
||||
return (rx->left ? _count_charsets(rx->left) : 0) +
|
||||
(rx->right ? _count_charsets(rx->right) : 0);
|
||||
}
|
||||
|
||||
static void _enumerate_charsets_internal(struct rx_node *rx, unsigned *i)
|
||||
{
|
||||
if (rx->type == CHARSET)
|
||||
rx->charset_index = (*i)++;
|
||||
else {
|
||||
if (rx->left)
|
||||
_enumerate_charsets_internal(rx->left, i);
|
||||
if (rx->right)
|
||||
_enumerate_charsets_internal(rx->right, i);
|
||||
}
|
||||
}
|
||||
|
||||
static void _enumerate_charsets(struct rx_node *rx)
|
||||
{
|
||||
unsigned i = 0;
|
||||
_enumerate_charsets_internal(rx, &i);
|
||||
}
|
||||
|
||||
static void _fill_table(struct dm_regex *m, struct rx_node *rx)
|
||||
{
|
||||
assert((rx->type != OR) || (rx->left && rx->right));
|
||||
@ -61,6 +91,8 @@ static void _fill_table(struct dm_regex *m, struct rx_node *rx)
|
||||
_fill_table(m, rx->right);
|
||||
|
||||
m->nodes[m->nodes_entered++] = rx;
|
||||
if (rx->type == CHARSET)
|
||||
m->charsets[m->charsets_entered++] = rx;
|
||||
}
|
||||
|
||||
static void _create_bitsets(struct dm_regex *m)
|
||||
@ -69,9 +101,9 @@ static void _create_bitsets(struct dm_regex *m)
|
||||
|
||||
for (i = 0; i < m->num_nodes; i++) {
|
||||
struct rx_node *n = m->nodes[i];
|
||||
n->firstpos = dm_bitset_create(m->scratch, m->num_nodes);
|
||||
n->lastpos = dm_bitset_create(m->scratch, m->num_nodes);
|
||||
n->followpos = dm_bitset_create(m->scratch, m->num_nodes);
|
||||
n->firstpos = dm_bitset_create(m->scratch, m->num_charsets);
|
||||
n->lastpos = dm_bitset_create(m->scratch, m->num_charsets);
|
||||
n->followpos = dm_bitset_create(m->scratch, m->num_charsets);
|
||||
}
|
||||
}
|
||||
|
||||
@ -85,7 +117,7 @@ static void _calc_functions(struct dm_regex *m)
|
||||
c1 = rx->left;
|
||||
c2 = rx->right;
|
||||
|
||||
if (dm_bit(rx->charset, TARGET_TRANS))
|
||||
if (rx->type == CHARSET && dm_bit(rx->charset, TARGET_TRANS))
|
||||
rx->final = final++;
|
||||
|
||||
switch (rx->type) {
|
||||
@ -125,8 +157,8 @@ static void _calc_functions(struct dm_regex *m)
|
||||
break;
|
||||
|
||||
case CHARSET:
|
||||
dm_bit_set(rx->firstpos, i);
|
||||
dm_bit_set(rx->lastpos, i);
|
||||
dm_bit_set(rx->firstpos, rx->charset_index);
|
||||
dm_bit_set(rx->lastpos, rx->charset_index);
|
||||
rx->nullable = 0;
|
||||
break;
|
||||
|
||||
@ -141,24 +173,22 @@ static void _calc_functions(struct dm_regex *m)
|
||||
*/
|
||||
switch (rx->type) {
|
||||
case CAT:
|
||||
for (j = 0; j < m->num_nodes; j++) {
|
||||
if (dm_bit(c1->lastpos, j)) {
|
||||
struct rx_node *n = m->nodes[j];
|
||||
for (j = 0; j < m->num_charsets; j++) {
|
||||
struct rx_node *n = m->charsets[j];
|
||||
if (dm_bit(c1->lastpos, j))
|
||||
dm_bit_union(n->followpos,
|
||||
n->followpos, c2->firstpos);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case PLUS:
|
||||
case STAR:
|
||||
for (j = 0; j < m->num_nodes; j++) {
|
||||
if (dm_bit(rx->lastpos, j)) {
|
||||
struct rx_node *n = m->nodes[j];
|
||||
for (j = 0; j < m->num_charsets; j++) {
|
||||
struct rx_node *n = m->charsets[j];
|
||||
if (dm_bit(rx->lastpos, j))
|
||||
dm_bit_union(n->followpos,
|
||||
n->followpos, rx->firstpos);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -189,7 +219,7 @@ static struct state_queue *_create_state_queue(struct dm_pool *mem,
|
||||
|
||||
static int _calc_states(struct dm_regex *m, struct rx_node *rx)
|
||||
{
|
||||
unsigned iwidth = (m->num_nodes / DM_BITS_PER_INT) + 1;
|
||||
unsigned iwidth = (m->num_charsets / DM_BITS_PER_INT) + 1;
|
||||
struct ttree *tt = ttree_create(m->scratch, iwidth);
|
||||
struct state_queue *h, *t, *tmp;
|
||||
struct dfa_state *dfa, *ldfa;
|
||||
@ -199,9 +229,26 @@ static int _calc_states(struct dm_regex *m, struct rx_node *rx)
|
||||
if (!tt)
|
||||
return_0;
|
||||
|
||||
if (!(bs = dm_bitset_create(m->scratch, m->num_nodes)))
|
||||
if (!(bs = dm_bitset_create(m->scratch, m->num_charsets)))
|
||||
return_0;
|
||||
|
||||
/* build some char maps */
|
||||
dm_bitset_t charmap[256];
|
||||
for (a = 0; a < 256; a++) {
|
||||
charmap[a] = dm_bitset_create(m->scratch, m->num_charsets);
|
||||
if (!charmap[a])
|
||||
return_0;
|
||||
}
|
||||
|
||||
for (i = 0; i < m->num_nodes; i++) {
|
||||
struct rx_node *n = m->nodes[i];
|
||||
if (n->type == CHARSET) {
|
||||
for (a = dm_bit_get_first(n->charset);
|
||||
a >= 0; a = dm_bit_get_next(n->charset, a))
|
||||
dm_bit_set(charmap[a], n->charset_index);
|
||||
}
|
||||
}
|
||||
|
||||
/* create first state */
|
||||
dfa = _create_dfa_state(m->mem);
|
||||
m->start = dfa;
|
||||
@ -209,8 +256,9 @@ static int _calc_states(struct dm_regex *m, struct rx_node *rx)
|
||||
|
||||
/* prime the queue */
|
||||
h = t = _create_state_queue(m->scratch, dfa, rx->firstpos);
|
||||
dm_bitset_t dfa_copy = dm_bitset_create(m->scratch, m->num_charsets);
|
||||
while (h) {
|
||||
int elems, j, idx[h->bits[0]];
|
||||
int elems, idx[h->bits[0]];
|
||||
|
||||
/* pop state off front of the queue */
|
||||
dfa = h->s;
|
||||
@ -227,18 +275,18 @@ static int _calc_states(struct dm_regex *m, struct rx_node *rx)
|
||||
idx[elems++] = i;
|
||||
|
||||
for (a = 0; a < 256; a++) {
|
||||
dm_bit_and(dfa_copy, charmap[a], dfa_bits);
|
||||
|
||||
/* iterate through all the states in firstpos */
|
||||
for (j = 0; j < elems; j++) {
|
||||
i = idx[j];
|
||||
if (dm_bit(m->nodes[i]->charset, a)) {
|
||||
for (i = dm_bit_get_first(dfa_copy);
|
||||
i >= 0; i = dm_bit_get_next(dfa_copy, i)) {
|
||||
if (a == TARGET_TRANS)
|
||||
dfa->final = m->nodes[i]->final;
|
||||
dfa->final = m->charsets[i]->final;
|
||||
|
||||
dm_bit_union(bs, bs,
|
||||
m->nodes[i]->followpos);
|
||||
m->charsets[i]->followpos);
|
||||
set_bits = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (set_bits) {
|
||||
ldfa = ttree_lookup(tt, bs + 1);
|
||||
@ -314,11 +362,16 @@ struct dm_regex *dm_regex_create(struct dm_pool *mem, const char **patterns,
|
||||
m->mem = mem;
|
||||
m->scratch = scratch;
|
||||
m->num_nodes = _count_nodes(rx);
|
||||
m->num_charsets = _count_charsets(rx);
|
||||
_enumerate_charsets(rx);
|
||||
m->nodes = dm_pool_alloc(scratch, sizeof(*m->nodes) * m->num_nodes);
|
||||
|
||||
if (!m->nodes)
|
||||
goto_bad;
|
||||
|
||||
m->charsets = dm_pool_alloc(scratch, sizeof(*m->charsets) * m->num_charsets);
|
||||
if (!m->charsets)
|
||||
goto_bad;
|
||||
|
||||
_fill_table(m, rx);
|
||||
_create_bitsets(m);
|
||||
_calc_functions(m);
|
||||
|
@ -284,7 +284,7 @@ static struct rx_node *_node(struct dm_pool *mem, int type,
|
||||
struct rx_node *n = dm_pool_zalloc(mem, sizeof(*n));
|
||||
|
||||
if (n) {
|
||||
if (!(n->charset = dm_bitset_create(mem, 256))) {
|
||||
if (type == CHARSET && !(n->charset = dm_bitset_create(mem, 256))) {
|
||||
dm_pool_free(mem, n);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -41,6 +41,7 @@ struct rx_node {
|
||||
struct rx_node *left, *right;
|
||||
|
||||
/* used to build the dfa for the toker */
|
||||
unsigned charset_index;
|
||||
int nullable, final;
|
||||
dm_bitset_t firstpos;
|
||||
dm_bitset_t lastpos;
|
||||
|
Loading…
Reference in New Issue
Block a user