mirror of
git://sourceware.org/git/lvm2.git
synced 2025-01-04 09:18:36 +03:00
[REGEX] reduce the number of charset nodes that are produced
This commit is contained in:
parent
2ef5d30611
commit
529edb1853
@ -32,8 +32,11 @@ struct state_queue {
|
|||||||
struct dm_regex { /* Instance variables for the lexer */
|
struct dm_regex { /* Instance variables for the lexer */
|
||||||
struct dfa_state *start;
|
struct dfa_state *start;
|
||||||
unsigned num_nodes;
|
unsigned num_nodes;
|
||||||
|
unsigned num_charsets;
|
||||||
int nodes_entered;
|
int nodes_entered;
|
||||||
struct rx_node **nodes;
|
struct rx_node **nodes;
|
||||||
|
int charsets_entered;
|
||||||
|
struct rx_node **charsets;
|
||||||
struct dm_pool *scratch, *mem;
|
struct dm_pool *scratch, *mem;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -50,6 +53,33 @@ static int _count_nodes(struct rx_node *rx)
|
|||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static unsigned _count_charsets(struct rx_node *rx)
|
||||||
|
{
|
||||||
|
if (rx->type == CHARSET)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
return (rx->left ? _count_charsets(rx->left) : 0) +
|
||||||
|
(rx->right ? _count_charsets(rx->right) : 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void _enumerate_charsets_internal(struct rx_node *rx, unsigned *i)
|
||||||
|
{
|
||||||
|
if (rx->type == CHARSET)
|
||||||
|
rx->charset_index = (*i)++;
|
||||||
|
else {
|
||||||
|
if (rx->left)
|
||||||
|
_enumerate_charsets_internal(rx->left, i);
|
||||||
|
if (rx->right)
|
||||||
|
_enumerate_charsets_internal(rx->right, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void _enumerate_charsets(struct rx_node *rx)
|
||||||
|
{
|
||||||
|
unsigned i = 0;
|
||||||
|
_enumerate_charsets_internal(rx, &i);
|
||||||
|
}
|
||||||
|
|
||||||
static void _fill_table(struct dm_regex *m, struct rx_node *rx)
|
static void _fill_table(struct dm_regex *m, struct rx_node *rx)
|
||||||
{
|
{
|
||||||
assert((rx->type != OR) || (rx->left && rx->right));
|
assert((rx->type != OR) || (rx->left && rx->right));
|
||||||
@ -61,6 +91,8 @@ static void _fill_table(struct dm_regex *m, struct rx_node *rx)
|
|||||||
_fill_table(m, rx->right);
|
_fill_table(m, rx->right);
|
||||||
|
|
||||||
m->nodes[m->nodes_entered++] = rx;
|
m->nodes[m->nodes_entered++] = rx;
|
||||||
|
if (rx->type == CHARSET)
|
||||||
|
m->charsets[m->charsets_entered++] = rx;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void _create_bitsets(struct dm_regex *m)
|
static void _create_bitsets(struct dm_regex *m)
|
||||||
@ -69,9 +101,9 @@ static void _create_bitsets(struct dm_regex *m)
|
|||||||
|
|
||||||
for (i = 0; i < m->num_nodes; i++) {
|
for (i = 0; i < m->num_nodes; i++) {
|
||||||
struct rx_node *n = m->nodes[i];
|
struct rx_node *n = m->nodes[i];
|
||||||
n->firstpos = dm_bitset_create(m->scratch, m->num_nodes);
|
n->firstpos = dm_bitset_create(m->scratch, m->num_charsets);
|
||||||
n->lastpos = dm_bitset_create(m->scratch, m->num_nodes);
|
n->lastpos = dm_bitset_create(m->scratch, m->num_charsets);
|
||||||
n->followpos = dm_bitset_create(m->scratch, m->num_nodes);
|
n->followpos = dm_bitset_create(m->scratch, m->num_charsets);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -85,7 +117,7 @@ static void _calc_functions(struct dm_regex *m)
|
|||||||
c1 = rx->left;
|
c1 = rx->left;
|
||||||
c2 = rx->right;
|
c2 = rx->right;
|
||||||
|
|
||||||
if (dm_bit(rx->charset, TARGET_TRANS))
|
if (rx->type == CHARSET && dm_bit(rx->charset, TARGET_TRANS))
|
||||||
rx->final = final++;
|
rx->final = final++;
|
||||||
|
|
||||||
switch (rx->type) {
|
switch (rx->type) {
|
||||||
@ -125,8 +157,8 @@ static void _calc_functions(struct dm_regex *m)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case CHARSET:
|
case CHARSET:
|
||||||
dm_bit_set(rx->firstpos, i);
|
dm_bit_set(rx->firstpos, rx->charset_index);
|
||||||
dm_bit_set(rx->lastpos, i);
|
dm_bit_set(rx->lastpos, rx->charset_index);
|
||||||
rx->nullable = 0;
|
rx->nullable = 0;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -141,24 +173,22 @@ static void _calc_functions(struct dm_regex *m)
|
|||||||
*/
|
*/
|
||||||
switch (rx->type) {
|
switch (rx->type) {
|
||||||
case CAT:
|
case CAT:
|
||||||
for (j = 0; j < m->num_nodes; j++) {
|
for (j = 0; j < m->num_charsets; j++) {
|
||||||
if (dm_bit(c1->lastpos, j)) {
|
struct rx_node *n = m->charsets[j];
|
||||||
struct rx_node *n = m->nodes[j];
|
if (dm_bit(c1->lastpos, j))
|
||||||
dm_bit_union(n->followpos,
|
dm_bit_union(n->followpos,
|
||||||
n->followpos, c2->firstpos);
|
n->followpos, c2->firstpos);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PLUS:
|
case PLUS:
|
||||||
case STAR:
|
case STAR:
|
||||||
for (j = 0; j < m->num_nodes; j++) {
|
for (j = 0; j < m->num_charsets; j++) {
|
||||||
if (dm_bit(rx->lastpos, j)) {
|
struct rx_node *n = m->charsets[j];
|
||||||
struct rx_node *n = m->nodes[j];
|
if (dm_bit(rx->lastpos, j))
|
||||||
dm_bit_union(n->followpos,
|
dm_bit_union(n->followpos,
|
||||||
n->followpos, rx->firstpos);
|
n->followpos, rx->firstpos);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -189,7 +219,7 @@ static struct state_queue *_create_state_queue(struct dm_pool *mem,
|
|||||||
|
|
||||||
static int _calc_states(struct dm_regex *m, struct rx_node *rx)
|
static int _calc_states(struct dm_regex *m, struct rx_node *rx)
|
||||||
{
|
{
|
||||||
unsigned iwidth = (m->num_nodes / DM_BITS_PER_INT) + 1;
|
unsigned iwidth = (m->num_charsets / DM_BITS_PER_INT) + 1;
|
||||||
struct ttree *tt = ttree_create(m->scratch, iwidth);
|
struct ttree *tt = ttree_create(m->scratch, iwidth);
|
||||||
struct state_queue *h, *t, *tmp;
|
struct state_queue *h, *t, *tmp;
|
||||||
struct dfa_state *dfa, *ldfa;
|
struct dfa_state *dfa, *ldfa;
|
||||||
@ -199,9 +229,26 @@ static int _calc_states(struct dm_regex *m, struct rx_node *rx)
|
|||||||
if (!tt)
|
if (!tt)
|
||||||
return_0;
|
return_0;
|
||||||
|
|
||||||
if (!(bs = dm_bitset_create(m->scratch, m->num_nodes)))
|
if (!(bs = dm_bitset_create(m->scratch, m->num_charsets)))
|
||||||
return_0;
|
return_0;
|
||||||
|
|
||||||
|
/* build some char maps */
|
||||||
|
dm_bitset_t charmap[256];
|
||||||
|
for (a = 0; a < 256; a++) {
|
||||||
|
charmap[a] = dm_bitset_create(m->scratch, m->num_charsets);
|
||||||
|
if (!charmap[a])
|
||||||
|
return_0;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < m->num_nodes; i++) {
|
||||||
|
struct rx_node *n = m->nodes[i];
|
||||||
|
if (n->type == CHARSET) {
|
||||||
|
for (a = dm_bit_get_first(n->charset);
|
||||||
|
a >= 0; a = dm_bit_get_next(n->charset, a))
|
||||||
|
dm_bit_set(charmap[a], n->charset_index);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* create first state */
|
/* create first state */
|
||||||
dfa = _create_dfa_state(m->mem);
|
dfa = _create_dfa_state(m->mem);
|
||||||
m->start = dfa;
|
m->start = dfa;
|
||||||
@ -209,8 +256,9 @@ static int _calc_states(struct dm_regex *m, struct rx_node *rx)
|
|||||||
|
|
||||||
/* prime the queue */
|
/* prime the queue */
|
||||||
h = t = _create_state_queue(m->scratch, dfa, rx->firstpos);
|
h = t = _create_state_queue(m->scratch, dfa, rx->firstpos);
|
||||||
|
dm_bitset_t dfa_copy = dm_bitset_create(m->scratch, m->num_charsets);
|
||||||
while (h) {
|
while (h) {
|
||||||
int elems, j, idx[h->bits[0]];
|
int elems, idx[h->bits[0]];
|
||||||
|
|
||||||
/* pop state off front of the queue */
|
/* pop state off front of the queue */
|
||||||
dfa = h->s;
|
dfa = h->s;
|
||||||
@ -227,18 +275,18 @@ static int _calc_states(struct dm_regex *m, struct rx_node *rx)
|
|||||||
idx[elems++] = i;
|
idx[elems++] = i;
|
||||||
|
|
||||||
for (a = 0; a < 256; a++) {
|
for (a = 0; a < 256; a++) {
|
||||||
|
dm_bit_and(dfa_copy, charmap[a], dfa_bits);
|
||||||
|
|
||||||
/* iterate through all the states in firstpos */
|
/* iterate through all the states in firstpos */
|
||||||
for (j = 0; j < elems; j++) {
|
for (i = dm_bit_get_first(dfa_copy);
|
||||||
i = idx[j];
|
i >= 0; i = dm_bit_get_next(dfa_copy, i)) {
|
||||||
if (dm_bit(m->nodes[i]->charset, a)) {
|
|
||||||
if (a == TARGET_TRANS)
|
if (a == TARGET_TRANS)
|
||||||
dfa->final = m->nodes[i]->final;
|
dfa->final = m->charsets[i]->final;
|
||||||
|
|
||||||
dm_bit_union(bs, bs,
|
dm_bit_union(bs, bs,
|
||||||
m->nodes[i]->followpos);
|
m->charsets[i]->followpos);
|
||||||
set_bits = 1;
|
set_bits = 1;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (set_bits) {
|
if (set_bits) {
|
||||||
ldfa = ttree_lookup(tt, bs + 1);
|
ldfa = ttree_lookup(tt, bs + 1);
|
||||||
@ -314,11 +362,16 @@ struct dm_regex *dm_regex_create(struct dm_pool *mem, const char **patterns,
|
|||||||
m->mem = mem;
|
m->mem = mem;
|
||||||
m->scratch = scratch;
|
m->scratch = scratch;
|
||||||
m->num_nodes = _count_nodes(rx);
|
m->num_nodes = _count_nodes(rx);
|
||||||
|
m->num_charsets = _count_charsets(rx);
|
||||||
|
_enumerate_charsets(rx);
|
||||||
m->nodes = dm_pool_alloc(scratch, sizeof(*m->nodes) * m->num_nodes);
|
m->nodes = dm_pool_alloc(scratch, sizeof(*m->nodes) * m->num_nodes);
|
||||||
|
|
||||||
if (!m->nodes)
|
if (!m->nodes)
|
||||||
goto_bad;
|
goto_bad;
|
||||||
|
|
||||||
|
m->charsets = dm_pool_alloc(scratch, sizeof(*m->charsets) * m->num_charsets);
|
||||||
|
if (!m->charsets)
|
||||||
|
goto_bad;
|
||||||
|
|
||||||
_fill_table(m, rx);
|
_fill_table(m, rx);
|
||||||
_create_bitsets(m);
|
_create_bitsets(m);
|
||||||
_calc_functions(m);
|
_calc_functions(m);
|
||||||
|
@ -284,7 +284,7 @@ static struct rx_node *_node(struct dm_pool *mem, int type,
|
|||||||
struct rx_node *n = dm_pool_zalloc(mem, sizeof(*n));
|
struct rx_node *n = dm_pool_zalloc(mem, sizeof(*n));
|
||||||
|
|
||||||
if (n) {
|
if (n) {
|
||||||
if (!(n->charset = dm_bitset_create(mem, 256))) {
|
if (type == CHARSET && !(n->charset = dm_bitset_create(mem, 256))) {
|
||||||
dm_pool_free(mem, n);
|
dm_pool_free(mem, n);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -41,6 +41,7 @@ struct rx_node {
|
|||||||
struct rx_node *left, *right;
|
struct rx_node *left, *right;
|
||||||
|
|
||||||
/* used to build the dfa for the toker */
|
/* used to build the dfa for the toker */
|
||||||
|
unsigned charset_index;
|
||||||
int nullable, final;
|
int nullable, final;
|
||||||
dm_bitset_t firstpos;
|
dm_bitset_t firstpos;
|
||||||
dm_bitset_t lastpos;
|
dm_bitset_t lastpos;
|
||||||
|
Loading…
Reference in New Issue
Block a user