summaryrefslogtreecommitdiff
path: root/libdm/regex
diff options
context:
space:
mode:
authorJoe Thornber <thornber@redhat.com>2010-07-21 11:58:49 +0000
committerJoe Thornber <thornber@redhat.com>2010-07-21 11:58:49 +0000
commit529edb1853f9b1a3199eacbed8dfabc14903ee11 (patch)
treeacb13a79b480fe1065a89b741636f57c5690c7a4 /libdm/regex
parent2ef5d3061164811c2efbbdd14a62c4bd714f874f (diff)
downloadlvm2-529edb1853f9b1a3199eacbed8dfabc14903ee11.tar.gz
[REGEX] reduce the number of charset nodes that are produced
Diffstat (limited to 'libdm/regex')
-rw-r--r--libdm/regex/matcher.c117
-rw-r--r--libdm/regex/parse_rx.c2
-rw-r--r--libdm/regex/parse_rx.h1
3 files changed, 87 insertions, 33 deletions
diff --git a/libdm/regex/matcher.c b/libdm/regex/matcher.c
index d8d856c84..07ba9e4a3 100644
--- a/libdm/regex/matcher.c
+++ b/libdm/regex/matcher.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
*
* This file is part of the device-mapper userspace tools.
@@ -32,8 +32,11 @@ struct state_queue {
struct dm_regex { /* Instance variables for the lexer */
struct dfa_state *start;
unsigned num_nodes;
+ unsigned num_charsets;
int nodes_entered;
struct rx_node **nodes;
+ int charsets_entered;
+ struct rx_node **charsets;
struct dm_pool *scratch, *mem;
};
@@ -50,6 +53,33 @@ static int _count_nodes(struct rx_node *rx)
return r;
}
+static unsigned _count_charsets(struct rx_node *rx)
+{
+ if (rx->type == CHARSET)
+ return 1;
+
+ return (rx->left ? _count_charsets(rx->left) : 0) +
+ (rx->right ? _count_charsets(rx->right) : 0);
+}
+
+static void _enumerate_charsets_internal(struct rx_node *rx, unsigned *i)
+{
+ if (rx->type == CHARSET)
+ rx->charset_index = (*i)++;
+ else {
+ if (rx->left)
+ _enumerate_charsets_internal(rx->left, i);
+ if (rx->right)
+ _enumerate_charsets_internal(rx->right, i);
+ }
+}
+
+static void _enumerate_charsets(struct rx_node *rx)
+{
+ unsigned i = 0;
+ _enumerate_charsets_internal(rx, &i);
+}
+
static void _fill_table(struct dm_regex *m, struct rx_node *rx)
{
assert((rx->type != OR) || (rx->left && rx->right));
@@ -61,6 +91,8 @@ static void _fill_table(struct dm_regex *m, struct rx_node *rx)
_fill_table(m, rx->right);
m->nodes[m->nodes_entered++] = rx;
+ if (rx->type == CHARSET)
+ m->charsets[m->charsets_entered++] = rx;
}
static void _create_bitsets(struct dm_regex *m)
@@ -69,9 +101,9 @@ static void _create_bitsets(struct dm_regex *m)
for (i = 0; i < m->num_nodes; i++) {
struct rx_node *n = m->nodes[i];
- n->firstpos = dm_bitset_create(m->scratch, m->num_nodes);
- n->lastpos = dm_bitset_create(m->scratch, m->num_nodes);
- n->followpos = dm_bitset_create(m->scratch, m->num_nodes);
+ n->firstpos = dm_bitset_create(m->scratch, m->num_charsets);
+ n->lastpos = dm_bitset_create(m->scratch, m->num_charsets);
+ n->followpos = dm_bitset_create(m->scratch, m->num_charsets);
}
}
@@ -85,7 +117,7 @@ static void _calc_functions(struct dm_regex *m)
c1 = rx->left;
c2 = rx->right;
- if (dm_bit(rx->charset, TARGET_TRANS))
+ if (rx->type == CHARSET && dm_bit(rx->charset, TARGET_TRANS))
rx->final = final++;
switch (rx->type) {
@@ -125,8 +157,8 @@ static void _calc_functions(struct dm_regex *m)
break;
case CHARSET:
- dm_bit_set(rx->firstpos, i);
- dm_bit_set(rx->lastpos, i);
+ dm_bit_set(rx->firstpos, rx->charset_index);
+ dm_bit_set(rx->lastpos, rx->charset_index);
rx->nullable = 0;
break;
@@ -141,23 +173,21 @@ static void _calc_functions(struct dm_regex *m)
*/
switch (rx->type) {
case CAT:
- for (j = 0; j < m->num_nodes; j++) {
- if (dm_bit(c1->lastpos, j)) {
- struct rx_node *n = m->nodes[j];
+ for (j = 0; j < m->num_charsets; j++) {
+ struct rx_node *n = m->charsets[j];
+ if (dm_bit(c1->lastpos, j))
dm_bit_union(n->followpos,
- n->followpos, c2->firstpos);
- }
+ n->followpos, c2->firstpos);
}
break;
case PLUS:
case STAR:
- for (j = 0; j < m->num_nodes; j++) {
- if (dm_bit(rx->lastpos, j)) {
- struct rx_node *n = m->nodes[j];
+ for (j = 0; j < m->num_charsets; j++) {
+ struct rx_node *n = m->charsets[j];
+ if (dm_bit(rx->lastpos, j))
dm_bit_union(n->followpos,
- n->followpos, rx->firstpos);
- }
+ n->followpos, rx->firstpos);
}
break;
}
@@ -189,7 +219,7 @@ static struct state_queue *_create_state_queue(struct dm_pool *mem,
static int _calc_states(struct dm_regex *m, struct rx_node *rx)
{
- unsigned iwidth = (m->num_nodes / DM_BITS_PER_INT) + 1;
+ unsigned iwidth = (m->num_charsets / DM_BITS_PER_INT) + 1;
struct ttree *tt = ttree_create(m->scratch, iwidth);
struct state_queue *h, *t, *tmp;
struct dfa_state *dfa, *ldfa;
@@ -199,9 +229,26 @@ static int _calc_states(struct dm_regex *m, struct rx_node *rx)
if (!tt)
return_0;
- if (!(bs = dm_bitset_create(m->scratch, m->num_nodes)))
+ if (!(bs = dm_bitset_create(m->scratch, m->num_charsets)))
return_0;
+ /* build some char maps */
+ dm_bitset_t charmap[256];
+ for (a = 0; a < 256; a++) {
+ charmap[a] = dm_bitset_create(m->scratch, m->num_charsets);
+ if (!charmap[a])
+ return_0;
+ }
+
+ for (i = 0; i < m->num_nodes; i++) {
+ struct rx_node *n = m->nodes[i];
+ if (n->type == CHARSET) {
+ for (a = dm_bit_get_first(n->charset);
+ a >= 0; a = dm_bit_get_next(n->charset, a))
+ dm_bit_set(charmap[a], n->charset_index);
+ }
+ }
+
/* create first state */
dfa = _create_dfa_state(m->mem);
m->start = dfa;
@@ -209,8 +256,9 @@ static int _calc_states(struct dm_regex *m, struct rx_node *rx)
/* prime the queue */
h = t = _create_state_queue(m->scratch, dfa, rx->firstpos);
+ dm_bitset_t dfa_copy = dm_bitset_create(m->scratch, m->num_charsets);
while (h) {
- int elems, j, idx[h->bits[0]];
+ int elems, idx[h->bits[0]];
/* pop state off front of the queue */
dfa = h->s;
@@ -227,18 +275,18 @@ static int _calc_states(struct dm_regex *m, struct rx_node *rx)
idx[elems++] = i;
for (a = 0; a < 256; a++) {
+ dm_bit_and(dfa_copy, charmap[a], dfa_bits);
+
/* iterate through all the states in firstpos */
- for (j = 0; j < elems; j++) {
- i = idx[j];
- if (dm_bit(m->nodes[i]->charset, a)) {
- if (a == TARGET_TRANS)
- dfa->final = m->nodes[i]->final;
-
- dm_bit_union(bs, bs,
- m->nodes[i]->followpos);
- set_bits = 1;
- }
- }
+ for (i = dm_bit_get_first(dfa_copy);
+ i >= 0; i = dm_bit_get_next(dfa_copy, i)) {
+ if (a == TARGET_TRANS)
+ dfa->final = m->charsets[i]->final;
+
+ dm_bit_union(bs, bs,
+ m->charsets[i]->followpos);
+ set_bits = 1;
+ }
if (set_bits) {
ldfa = ttree_lookup(tt, bs + 1);
@@ -314,11 +362,16 @@ struct dm_regex *dm_regex_create(struct dm_pool *mem, const char **patterns,
m->mem = mem;
m->scratch = scratch;
m->num_nodes = _count_nodes(rx);
+ m->num_charsets = _count_charsets(rx);
+ _enumerate_charsets(rx);
m->nodes = dm_pool_alloc(scratch, sizeof(*m->nodes) * m->num_nodes);
-
if (!m->nodes)
goto_bad;
+ m->charsets = dm_pool_alloc(scratch, sizeof(*m->charsets) * m->num_charsets);
+ if (!m->charsets)
+ goto_bad;
+
_fill_table(m, rx);
_create_bitsets(m);
_calc_functions(m);
diff --git a/libdm/regex/parse_rx.c b/libdm/regex/parse_rx.c
index 20f8c30e2..56260f54e 100644
--- a/libdm/regex/parse_rx.c
+++ b/libdm/regex/parse_rx.c
@@ -284,7 +284,7 @@ static struct rx_node *_node(struct dm_pool *mem, int type,
struct rx_node *n = dm_pool_zalloc(mem, sizeof(*n));
if (n) {
- if (!(n->charset = dm_bitset_create(mem, 256))) {
+ if (type == CHARSET && !(n->charset = dm_bitset_create(mem, 256))) {
dm_pool_free(mem, n);
return NULL;
}
diff --git a/libdm/regex/parse_rx.h b/libdm/regex/parse_rx.h
index 4a5d8b94d..37d7ced8a 100644
--- a/libdm/regex/parse_rx.h
+++ b/libdm/regex/parse_rx.h
@@ -41,6 +41,7 @@ struct rx_node {
struct rx_node *left, *right;
/* used to build the dfa for the toker */
+ unsigned charset_index;
int nullable, final;
dm_bitset_t firstpos;
dm_bitset_t lastpos;