summaryrefslogtreecommitdiff
path: root/device_mapper/regex
diff options
context:
space:
mode:
authorJoe Thornber <ejt@redhat.com>2018-05-14 12:16:43 +0100
committerJoe Thornber <ejt@redhat.com>2018-05-16 13:00:50 +0100
commitccc35e2647b3b78f2fb0d62c25f21fcccbf58950 (patch)
treeb0fc56a0f74b6c6b17c0eeaffa00a921d704b8ec /device_mapper/regex
parent7f97c7ea9ade36bab817aca9eeee5c5177550127 (diff)
downloadlvm2-ccc35e2647b3b78f2fb0d62c25f21fcccbf58950.tar.gz
device-mapper: Fork libdm internally.
The device-mapper directory now holds a copy of libdm source. At the moment this code is identical to libdm. Over time code will migrate out to appropriate places (see doc/refactoring.txt). The libdm directory still exists, and contains the source for the libdevmapper shared library, which we will continue to ship (though not neccessarily update). All code using libdm should now use the version in device-mapper.
Diffstat (limited to 'device_mapper/regex')
-rw-r--r--device_mapper/regex/matcher.c575
-rw-r--r--device_mapper/regex/parse_rx.c667
-rw-r--r--device_mapper/regex/parse_rx.h55
-rw-r--r--device_mapper/regex/ttree.c114
-rw-r--r--device_mapper/regex/ttree.h26
5 files changed, 1437 insertions, 0 deletions
diff --git a/device_mapper/regex/matcher.c b/device_mapper/regex/matcher.c
new file mode 100644
index 000000000..375c1abdc
--- /dev/null
+++ b/device_mapper/regex/matcher.c
@@ -0,0 +1,575 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2012 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "misc/dmlib.h"
+#include "parse_rx.h"
+#include "ttree.h"
+#include "assert.h"
+
+struct dfa_state {
+ struct dfa_state *next;
+ int final;
+ dm_bitset_t bits;
+ struct dfa_state *lookup[256];
+};
+
+struct dm_regex { /* Instance variables for the lexer */
+ struct dfa_state *start;
+ unsigned num_nodes;
+ unsigned num_charsets;
+ int nodes_entered;
+ struct rx_node **nodes;
+ int charsets_entered;
+ struct rx_node **charsets;
+ struct dm_pool *scratch, *mem;
+
+ /* stuff for on the fly dfa calculation */
+ dm_bitset_t charmap[256];
+ dm_bitset_t dfa_copy;
+ struct ttree *tt;
+ dm_bitset_t bs;
+ struct dfa_state *h, *t;
+};
+
+static int _count_nodes(struct rx_node *rx)
+{
+ int r = 1;
+
+ if (rx->left)
+ r += _count_nodes(rx->left);
+
+ if (rx->right)
+ r += _count_nodes(rx->right);
+
+ return r;
+}
+
+static unsigned _count_charsets(struct rx_node *rx)
+{
+ if (rx->type == CHARSET)
+ return 1;
+
+ return (rx->left ? _count_charsets(rx->left) : 0) +
+ (rx->right ? _count_charsets(rx->right) : 0);
+}
+
+static void _enumerate_charsets_internal(struct rx_node *rx, unsigned *i)
+{
+ if (rx->type == CHARSET)
+ rx->charset_index = (*i)++;
+ else {
+ if (rx->left)
+ _enumerate_charsets_internal(rx->left, i);
+ if (rx->right)
+ _enumerate_charsets_internal(rx->right, i);
+ }
+}
+
+static void _enumerate_charsets(struct rx_node *rx)
+{
+ unsigned i = 0;
+ _enumerate_charsets_internal(rx, &i);
+}
+
+static void _fill_table(struct dm_regex *m, struct rx_node *rx)
+{
+ assert((rx->type != OR) || (rx->left && rx->right));
+
+ if (rx->left)
+ _fill_table(m, rx->left);
+
+ if (rx->right)
+ _fill_table(m, rx->right);
+
+ m->nodes[m->nodes_entered++] = rx;
+ if (rx->type == CHARSET)
+ m->charsets[m->charsets_entered++] = rx;
+}
+
+static int _create_bitsets(struct dm_regex *m)
+{
+ unsigned i;
+ struct rx_node *n;
+
+ for (i = 0; i < m->num_nodes; i++) {
+ n = m->nodes[i];
+ if (!(n->firstpos = dm_bitset_create(m->scratch, m->num_charsets)))
+ return_0;
+ if (!(n->lastpos = dm_bitset_create(m->scratch, m->num_charsets)))
+ return_0;
+ if (!(n->followpos = dm_bitset_create(m->scratch, m->num_charsets)))
+ return_0;
+ }
+
+ return 1;
+}
+
+static void _calc_functions(struct dm_regex *m)
+{
+ unsigned i, j, final = 1;
+ struct rx_node *rx, *c1, *c2;
+
+ for (i = 0; i < m->num_nodes; i++) {
+ rx = m->nodes[i];
+ c1 = rx->left;
+ c2 = rx->right;
+
+ if (rx->type == CHARSET && dm_bit(rx->charset, TARGET_TRANS))
+ rx->final = final++;
+
+ switch (rx->type) {
+ case CAT:
+ if (c1->nullable)
+ dm_bit_union(rx->firstpos,
+ c1->firstpos, c2->firstpos);
+ else
+ dm_bit_copy(rx->firstpos, c1->firstpos);
+
+ if (c2->nullable)
+ dm_bit_union(rx->lastpos,
+ c1->lastpos, c2->lastpos);
+ else
+ dm_bit_copy(rx->lastpos, c2->lastpos);
+
+ rx->nullable = c1->nullable && c2->nullable;
+ break;
+
+ case PLUS:
+ dm_bit_copy(rx->firstpos, c1->firstpos);
+ dm_bit_copy(rx->lastpos, c1->lastpos);
+ rx->nullable = c1->nullable;
+ break;
+
+ case OR:
+ dm_bit_union(rx->firstpos, c1->firstpos, c2->firstpos);
+ dm_bit_union(rx->lastpos, c1->lastpos, c2->lastpos);
+ rx->nullable = c1->nullable || c2->nullable;
+ break;
+
+ case QUEST:
+ case STAR:
+ dm_bit_copy(rx->firstpos, c1->firstpos);
+ dm_bit_copy(rx->lastpos, c1->lastpos);
+ rx->nullable = 1;
+ break;
+
+ case CHARSET:
+ dm_bit_set(rx->firstpos, rx->charset_index);
+ dm_bit_set(rx->lastpos, rx->charset_index);
+ rx->nullable = 0;
+ break;
+
+ default:
+ log_error(INTERNAL_ERROR "Unknown calc node type");
+ }
+
+ /*
+ * followpos has it's own switch
+ * because PLUS and STAR do the
+ * same thing.
+ */
+ switch (rx->type) {
+ case CAT:
+ for (j = 0; j < m->num_charsets; j++) {
+ struct rx_node *n = m->charsets[j];
+ if (dm_bit(c1->lastpos, j))
+ dm_bit_union(n->followpos,
+ n->followpos, c2->firstpos);
+ }
+ break;
+
+ case PLUS:
+ case STAR:
+ for (j = 0; j < m->num_charsets; j++) {
+ struct rx_node *n = m->charsets[j];
+ if (dm_bit(rx->lastpos, j))
+ dm_bit_union(n->followpos,
+ n->followpos, rx->firstpos);
+ }
+ break;
+ }
+ }
+}
+
+static struct dfa_state *_create_dfa_state(struct dm_pool *mem)
+{
+ return dm_pool_zalloc(mem, sizeof(struct dfa_state));
+}
+
+static struct dfa_state *_create_state_queue(struct dm_pool *mem,
+ struct dfa_state *dfa,
+ dm_bitset_t bits)
+{
+ if (!(dfa->bits = dm_bitset_create(mem, bits[0]))) /* first element is the size */
+ return_NULL;
+
+ dm_bit_copy(dfa->bits, bits);
+ dfa->next = 0;
+ dfa->final = -1;
+
+ return dfa;
+}
+
+static int _calc_state(struct dm_regex *m, struct dfa_state *dfa, int a)
+{
+ int set_bits = 0, i;
+ dm_bitset_t dfa_bits = dfa->bits;
+ dm_bit_and(m->dfa_copy, m->charmap[a], dfa_bits);
+
+ /* iterate through all the states in firstpos */
+ for (i = dm_bit_get_first(m->dfa_copy); i >= 0; i = dm_bit_get_next(m->dfa_copy, i)) {
+ if (a == TARGET_TRANS)
+ dfa->final = m->charsets[i]->final;
+
+ dm_bit_union(m->bs, m->bs, m->charsets[i]->followpos);
+ set_bits = 1;
+ }
+
+ if (set_bits) {
+ struct dfa_state *tmp;
+ struct dfa_state *ldfa = ttree_lookup(m->tt, m->bs + 1);
+ if (!ldfa) {
+ /* push */
+ if (!(ldfa = _create_dfa_state(m->mem)))
+ return_0;
+
+ ttree_insert(m->tt, m->bs + 1, ldfa);
+ if (!(tmp = _create_state_queue(m->scratch, ldfa, m->bs)))
+ return_0;
+ if (!m->h)
+ m->h = m->t = tmp;
+ else {
+ m->t->next = tmp;
+ m->t = tmp;
+ }
+ }
+
+ dfa->lookup[a] = ldfa;
+ dm_bit_clear_all(m->bs);
+ }
+
+ return 1;
+}
+
+static int _calc_states(struct dm_regex *m, struct rx_node *rx)
+{
+ unsigned iwidth = (m->num_charsets / DM_BITS_PER_INT) + 1;
+ struct dfa_state *dfa;
+ struct rx_node *n;
+ unsigned i;
+ int a;
+
+ if (!(m->tt = ttree_create(m->scratch, iwidth)))
+ return_0;
+
+ if (!(m->bs = dm_bitset_create(m->scratch, m->num_charsets)))
+ return_0;
+
+ /* build some char maps */
+ for (a = 0; a < 256; a++)
+ if (!(m->charmap[a] = dm_bitset_create(m->scratch, m->num_charsets)))
+ return_0;
+
+ for (i = 0; i < m->num_nodes; i++) {
+ n = m->nodes[i];
+ if (n->type == CHARSET) {
+ for (a = dm_bit_get_first(n->charset);
+ a >= 0; a = dm_bit_get_next(n->charset, a))
+ dm_bit_set(m->charmap[a], n->charset_index);
+ }
+ }
+
+ /* create first state */
+ if (!(dfa = _create_dfa_state(m->mem)))
+ return_0;
+
+ m->start = dfa;
+ ttree_insert(m->tt, rx->firstpos + 1, dfa);
+
+ /* prime the queue */
+ if (!(m->h = m->t = _create_state_queue(m->scratch, dfa, rx->firstpos)))
+ return_0;
+
+ if (!(m->dfa_copy = dm_bitset_create(m->scratch, m->num_charsets)))
+ return_0;
+
+ return 1;
+}
+
+/*
+ * Forces all the dfa states to be calculated up front, ie. what
+ * _calc_states() used to do before we switched to calculating on demand.
+ */
+static int _force_states(struct dm_regex *m)
+{
+ int a;
+
+ /* keep processing until there's nothing in the queue */
+ struct dfa_state *s;
+ while ((s = m->h)) {
+ /* pop state off front of the queue */
+ m->h = m->h->next;
+
+ /* iterate through all the inputs for this state */
+ dm_bit_clear_all(m->bs);
+ for (a = 0; a < 256; a++)
+ if (!_calc_state(m, s, a))
+ return_0;
+ }
+
+ return 1;
+}
+
+struct dm_regex *dm_regex_create(struct dm_pool *mem, const char * const *patterns,
+ unsigned num_patterns)
+{
+ char *all, *ptr;
+ unsigned i;
+ size_t len = 0;
+ struct rx_node *rx;
+ struct dm_regex *m;
+ struct dm_pool *scratch = mem;
+
+ if (!(m = dm_pool_zalloc(mem, sizeof(*m))))
+ return_NULL;
+
+ /* join the regexps together, delimiting with zero */
+ for (i = 0; i < num_patterns; i++)
+ len += strlen(patterns[i]) + 8;
+
+ ptr = all = dm_pool_alloc(scratch, len + 1);
+
+ if (!all)
+ goto_bad;
+
+ for (i = 0; i < num_patterns; i++) {
+ ptr += sprintf(ptr, "(.*(%s)%c)", patterns[i], TARGET_TRANS);
+ if (i < (num_patterns - 1))
+ *ptr++ = '|';
+ }
+
+ /* parse this expression */
+ if (!(rx = rx_parse_tok(scratch, all, ptr))) {
+ log_error("Couldn't parse regex");
+ goto bad;
+ }
+
+ m->mem = mem;
+ m->scratch = scratch;
+ m->num_nodes = _count_nodes(rx);
+ m->num_charsets = _count_charsets(rx);
+ _enumerate_charsets(rx);
+ if (!(m->nodes = dm_pool_alloc(scratch, sizeof(*m->nodes) * m->num_nodes)))
+ goto_bad;
+
+ if (!(m->charsets = dm_pool_alloc(scratch, sizeof(*m->charsets) * m->num_charsets)))
+ goto_bad;
+
+ _fill_table(m, rx);
+
+ if (!_create_bitsets(m))
+ goto_bad;
+
+ _calc_functions(m);
+
+ if (!_calc_states(m, rx))
+ goto_bad;
+
+ return m;
+
+ bad:
+ dm_pool_free(mem, m);
+
+ return NULL;
+}
+
+static struct dfa_state *_step_matcher(struct dm_regex *m, int c, struct dfa_state *cs, int *r)
+{
+ struct dfa_state *ns;
+
+ if (!(ns = cs->lookup[(unsigned char) c])) {
+ if (!_calc_state(m, cs, (unsigned char) c))
+ return_NULL;
+
+ if (!(ns = cs->lookup[(unsigned char) c]))
+ return NULL;
+ }
+
+ // yuck, we have to special case the target trans
+ if ((ns->final == -1) &&
+ !_calc_state(m, ns, TARGET_TRANS))
+ return_NULL;
+
+ if (ns->final && (ns->final > *r))
+ *r = ns->final;
+
+ return ns;
+}
+
+int dm_regex_match(struct dm_regex *regex, const char *s)
+{
+ struct dfa_state *cs = regex->start;
+ int r = 0;
+
+ dm_bit_clear_all(regex->bs);
+ if (!(cs = _step_matcher(regex, HAT_CHAR, cs, &r)))
+ goto out;
+
+ for (; *s; s++)
+ if (!(cs = _step_matcher(regex, *s, cs, &r)))
+ goto out;
+
+ _step_matcher(regex, DOLLAR_CHAR, cs, &r);
+
+ out:
+ /* subtract 1 to get back to zero index */
+ return r - 1;
+}
+
+/*
+ * The next block of code concerns calculating a fingerprint for the dfa.
+ *
+ * We're not calculating a minimal dfa in _calculate_state (maybe a future
+ * improvement). As such it's possible that two non-isomorphic dfas
+ * recognise the same language. This can only really happen if you start
+ * with equivalent, but different regexes (for example the simplifier in
+ * parse_rx.c may have changed).
+ *
+ * The code is inefficient; repeatedly searching a singly linked list for
+ * previously seen nodes. Not worried since this is test code.
+ */
+struct node_list {
+ unsigned node_id;
+ struct dfa_state *node;
+ struct node_list *next;
+};
+
+struct printer {
+ struct dm_pool *mem;
+ struct node_list *pending;
+ struct node_list *processed;
+ unsigned next_index;
+};
+
+static uint32_t _randomise(uint32_t n)
+{
+ /* 2^32 - 5 */
+ uint32_t const prime = (~0) - 4;
+ return n * prime;
+}
+
+static int _seen(struct node_list *n, struct dfa_state *node, uint32_t *i)
+{
+ while (n) {
+ if (n->node == node) {
+ *i = n->node_id;
+ return 1;
+ }
+ n = n->next;
+ }
+
+ return 0;
+}
+
+/*
+ * Push node if it's not been seen before, returning a unique index.
+ */
+static uint32_t _push_node(struct printer *p, struct dfa_state *node)
+{
+ uint32_t i;
+ struct node_list *n;
+
+ if (_seen(p->pending, node, &i) ||
+ _seen(p->processed, node, &i))
+ return i;
+
+ if (!(n = dm_pool_alloc(p->mem, sizeof(*n))))
+ return_0;
+
+ n->node_id = ++p->next_index; /* start from 1, keep 0 as error code */
+ n->node = node;
+ n->next = p->pending;
+ p->pending = n;
+
+ return n->node_id;
+}
+
+/*
+ * Pop the front node, and fill out it's previously assigned index.
+ */
+static struct dfa_state *_pop_node(struct printer *p)
+{
+ struct dfa_state *node = NULL;
+ struct node_list *n;
+
+ if (p->pending) {
+ n = p->pending;
+ p->pending = n->next;
+ n->next = p->processed;
+ p->processed = n;
+
+ node = n->node;
+ }
+
+ return node;
+}
+
+static uint32_t _combine(uint32_t n1, uint32_t n2)
+{
+ return ((n1 << 8) | (n1 >> 24)) ^ _randomise(n2);
+}
+
+static uint32_t _fingerprint(struct printer *p)
+{
+ int c;
+ uint32_t result = 0;
+ struct dfa_state *node;
+
+ while ((node = _pop_node(p))) {
+ result = _combine(result, (node->final < 0) ? 0 : node->final);
+ for (c = 0; c < 256; c++)
+ result = _combine(result,
+ _push_node(p, node->lookup[c]));
+ }
+
+ return result;
+}
+
+uint32_t dm_regex_fingerprint(struct dm_regex *regex)
+{
+ struct printer p;
+ uint32_t result = 0;
+ struct dm_pool *mem = dm_pool_create("regex fingerprint", 1024);
+
+ if (!mem)
+ return_0;
+
+ if (!_force_states(regex))
+ goto_out;
+
+ p.mem = mem;
+ p.pending = NULL;
+ p.processed = NULL;
+ p.next_index = 0;
+
+ if (!_push_node(&p, regex->start))
+ goto_out;
+
+ result = _fingerprint(&p);
+out:
+ dm_pool_destroy(mem);
+
+ return result;
+}
diff --git a/device_mapper/regex/parse_rx.c b/device_mapper/regex/parse_rx.c
new file mode 100644
index 000000000..cc83bfe35
--- /dev/null
+++ b/device_mapper/regex/parse_rx.c
@@ -0,0 +1,667 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "misc/dmlib.h"
+#include "parse_rx.h"
+
+#ifdef DEBUG
+#include <ctype.h>
+
+__attribute__ ((__unused__))
+static void _regex_print(struct rx_node *rx, int depth, unsigned show_nodes)
+{
+ int i, numchars;
+
+ if (rx->left) {
+ if (rx->left->type != CHARSET && (show_nodes || (!((rx->type == CAT || rx->type == OR) && rx->left->type == CAT))))
+ printf("(");
+
+ _regex_print(rx->left, depth + 1, show_nodes);
+
+ if (rx->left->type != CHARSET && (show_nodes || (!((rx->type == CAT || rx->type == OR) && rx->left->type == CAT))))
+ printf(")");
+ }
+
+ /* display info about the node */
+ switch (rx->type) {
+ case CAT:
+ break;
+
+ case OR:
+ printf("|");
+ break;
+
+ case STAR:
+ printf("*");
+ break;
+
+ case PLUS:
+ printf("+");
+ break;
+
+ case QUEST:
+ printf("?");
+ break;
+
+ case CHARSET:
+ numchars = 0;
+ for (i = 0; i < 256; i++)
+ if (dm_bit(rx->charset, i) && (isprint(i) || i == HAT_CHAR || i == DOLLAR_CHAR))
+ numchars++;
+ if (numchars == 97) {
+ printf(".");
+ break;
+ }
+ if (numchars > 1)
+ printf("[");
+ for (i = 0; i < 256; i++)
+ if (dm_bit(rx->charset, i)) {
+ if (isprint(i))
+ printf("%c", (char) i);
+ else if (i == HAT_CHAR)
+ printf("^");
+ else if (i == DOLLAR_CHAR)
+ printf("$");
+ }
+ if (numchars > 1)
+ printf("]");
+ break;
+
+ default:
+ fprintf(stderr, "Unknown type");
+ }
+
+ if (rx->right) {
+ if (rx->right->type != CHARSET && (show_nodes || (!(rx->type == CAT && rx->right->type == CAT) && rx->right->right)))
+ printf("(");
+ _regex_print(rx->right, depth + 1, show_nodes);
+ if (rx->right->type != CHARSET && (show_nodes || (!(rx->type == CAT && rx->right->type == CAT) && rx->right->right)))
+ printf(")");
+ }
+
+ if (!depth)
+ printf("\n");
+}
+#endif /* DEBUG */
+
+struct parse_sp { /* scratch pad for the parsing process */
+ struct dm_pool *mem;
+ int type; /* token type, 0 indicates a charset */
+ dm_bitset_t charset; /* The current charset */
+ const char *cursor; /* where we are in the regex */
+ const char *rx_end; /* 1pte for the expression being parsed */
+};
+
+static struct rx_node *_or_term(struct parse_sp *ps);
+
+static void _single_char(struct parse_sp *ps, unsigned int c, const char *ptr)
+{
+ ps->type = 0;
+ ps->cursor = ptr + 1;
+ dm_bit_clear_all(ps->charset);
+ dm_bit_set(ps->charset, c);
+}
+
+/*
+ * Get the next token from the regular expression.
+ * Returns: 1 success, 0 end of input, -1 error.
+ */
+static int _rx_get_token(struct parse_sp *ps)
+{
+ int neg = 0, range = 0;
+ char c, lc = 0;
+ const char *ptr = ps->cursor;
+ if (ptr == ps->rx_end) { /* end of input ? */
+ ps->type = -1;
+ return 0;
+ }
+
+ switch (*ptr) {
+ /* charsets and ncharsets */
+ case '[':
+ ptr++;
+ if (*ptr == '^') {
+ dm_bit_set_all(ps->charset);
+
+ /* never transition on zero */
+ dm_bit_clear(ps->charset, 0);
+ neg = 1;
+ ptr++;
+
+ } else
+ dm_bit_clear_all(ps->charset);
+
+ while ((ptr < ps->rx_end) && (*ptr != ']')) {
+ if (*ptr == '\\') {
+ /* an escaped character */
+ ptr++;
+ switch (*ptr) {
+ case 'n':
+ c = '\n';
+ break;
+ case 'r':
+ c = '\r';
+ break;
+ case 't':
+ c = '\t';
+ break;
+ default:
+ c = *ptr;
+ }
+ } else if (*ptr == '-' && lc) {
+ /* we've got a range on our hands */
+ range = 1;
+ ptr++;
+ if (ptr == ps->rx_end) {
+ log_error("Incomplete range"
+ "specification");
+ return -1;
+ }
+ c = *ptr;
+ } else
+ c = *ptr;
+
+ if (range) {
+ /* add lc - c into the bitset */
+ if (lc > c) {
+ char tmp = c;
+ c = lc;
+ lc = tmp;
+ }
+
+ for (; lc <= c; lc++) {
+ if (neg)
+ dm_bit_clear(ps->charset, lc);
+ else
+ dm_bit_set(ps->charset, lc);
+ }
+ range = 0;
+ } else {
+ /* add c into the bitset */
+ if (neg)
+ dm_bit_clear(ps->charset, c);
+ else
+ dm_bit_set(ps->charset, c);
+ }
+ ptr++;
+ lc = c;
+ }
+
+ if (ptr >= ps->rx_end) {
+ ps->type = -1;
+ return -1;
+ }
+
+ ps->type = 0;
+ ps->cursor = ptr + 1;
+ break;
+
+ /* These characters are special, we just return their ASCII
+ codes as the type. Sorted into ascending order to help the
+ compiler */
+ case '(':
+ case ')':
+ case '*':
+ case '+':
+ case '?':
+ case '|':
+ ps->type = (int) *ptr;
+ ps->cursor = ptr + 1;
+ break;
+
+ case '^':
+ _single_char(ps, HAT_CHAR, ptr);
+ break;
+
+ case '$':
+ _single_char(ps, DOLLAR_CHAR, ptr);
+ break;
+
+ case '.':
+ /* The 'all but newline' character set */
+ ps->type = 0;
+ ps->cursor = ptr + 1;
+ dm_bit_set_all(ps->charset);
+ dm_bit_clear(ps->charset, (int) '\n');
+ dm_bit_clear(ps->charset, (int) '\r');
+ dm_bit_clear(ps->charset, 0);
+ break;
+
+ case '\\':
+ /* escaped character */
+ ptr++;
+ if (ptr >= ps->rx_end) {
+ log_error("Badly quoted character at end "
+ "of expression");
+ ps->type = -1;
+ return -1;
+ }
+
+ ps->type = 0;
+ ps->cursor = ptr + 1;
+ dm_bit_clear_all(ps->charset);
+ switch (*ptr) {
+ case 'n':
+ dm_bit_set(ps->charset, (int) '\n');
+ break;
+ case 'r':
+ dm_bit_set(ps->charset, (int) '\r');
+ break;
+ case 't':
+ dm_bit_set(ps->charset, (int) '\t');
+ break;
+ default:
+ dm_bit_set(ps->charset, (int) *ptr);
+ }
+ break;
+
+ default:
+ /* add a single character to the bitset */
+ ps->type = 0;
+ ps->cursor = ptr + 1;
+ dm_bit_clear_all(ps->charset);
+ dm_bit_set(ps->charset, (int) (unsigned char) *ptr);
+ break;
+ }
+
+ return 1;
+}
+
+static struct rx_node *_node(struct dm_pool *mem, int type,
+ struct rx_node *l, struct rx_node *r)
+{
+ struct rx_node *n = dm_pool_zalloc(mem, sizeof(*n));
+
+ if (n) {
+ if (type == CHARSET && !(n->charset = dm_bitset_create(mem, 256))) {
+ dm_pool_free(mem, n);
+ return NULL;
+ }
+
+ n->type = type;
+ n->left = l;
+ n->right = r;
+ }
+
+ return n;
+}
+
+static struct rx_node *_term(struct parse_sp *ps)
+{
+ struct rx_node *n;
+
+ switch (ps->type) {
+ case 0:
+ if (!(n = _node(ps->mem, CHARSET, NULL, NULL)))
+ return_NULL;
+
+ dm_bit_copy(n->charset, ps->charset);
+ _rx_get_token(ps); /* match charset */
+ break;
+
+ case '(':
+ _rx_get_token(ps); /* match '(' */
+ n = _or_term(ps);
+ if (ps->type != ')') {
+ log_error("missing ')' in regular expression");
+ return 0;
+ }
+ _rx_get_token(ps); /* match ')' */
+ break;
+
+ default:
+ n = 0;
+ }
+
+ return n;
+}
+
+static struct rx_node *_closure_term(struct parse_sp *ps)
+{
+ struct rx_node *l, *n;
+
+ if (!(l = _term(ps)))
+ return NULL;
+
+ for (;;) {
+ switch (ps->type) {
+ case '*':
+ n = _node(ps->mem, STAR, l, NULL);
+ break;
+
+ case '+':
+ n = _node(ps->mem, PLUS, l, NULL);
+ break;
+
+ case '?':
+ n = _node(ps->mem, QUEST, l, NULL);
+ break;
+
+ default:
+ return l;
+ }
+
+ if (!n)
+ return_NULL;
+
+ _rx_get_token(ps);
+ l = n;
+ }
+
+ return n;
+}
+
+static struct rx_node *_cat_term(struct parse_sp *ps)
+{
+ struct rx_node *l, *r, *n;
+
+ if (!(l = _closure_term(ps)))
+ return NULL;
+
+ if (ps->type == '|')
+ return l;
+
+ if (!(r = _cat_term(ps)))
+ return l;
+
+ if (!(n = _node(ps->mem, CAT, l, r)))
+ stack;
+
+ return n;
+}
+
+static struct rx_node *_or_term(struct parse_sp *ps)
+{
+ struct rx_node *l, *r, *n;
+
+ if (!(l = _cat_term(ps)))
+ return NULL;
+
+ if (ps->type != '|')
+ return l;
+
+ _rx_get_token(ps); /* match '|' */
+
+ if (!(r = _or_term(ps))) {
+ log_error("Badly formed 'or' expression");
+ return NULL;
+ }
+
+ if (!(n = _node(ps->mem, OR, l, r)))
+ stack;
+
+ return n;
+}
+
+/*----------------------------------------------------------------*/
+
+/* Macros for left and right nodes. Inverted if 'leftmost' is set. */
+#define LEFT(a) (leftmost ? (a)->left : (a)->right)
+#define RIGHT(a) (leftmost ? (a)->right : (a)->left)
+
+/*
+ * The optimiser spots common prefixes on either side of an 'or' node, and
+ * lifts them outside the 'or' with a 'cat'.
+ */
+static unsigned _depth(struct rx_node *r, unsigned leftmost)
+{
+ int count = 1;
+
+ while (r->type != CHARSET && LEFT(r) && (leftmost || r->type != OR)) {
+ count++;
+ r = LEFT(r);
+ }
+
+ return count;
+}
+
+/*
+ * FIXME: a unique key could be built up as part of the parse, to make the
+ * comparison quick. Alternatively we could use cons-hashing, and then
+ * this would simply be a pointer comparison.
+ */
+static int _nodes_equal(struct rx_node *l, struct rx_node *r)
+{
+ if (l->type != r->type)
+ return 0;
+
+ switch (l->type) {
+ case CAT:
+ case OR:
+ return _nodes_equal(l->left, r->left) &&
+ _nodes_equal(l->right, r->right);
+
+ case STAR:
+ case PLUS:
+ case QUEST:
+ return _nodes_equal(l->left, r->left);
+
+ case CHARSET:
+ /*
+ * Never change anything containing TARGET_TRANS
+ * used by matcher as boundary marker between concatenated
+ * expressions.
+ */
+ return (!dm_bit(l->charset, TARGET_TRANS) && dm_bitset_equal(l->charset, r->charset));
+ }
+
+ /* NOTREACHED */
+ return_0;
+}
+
+static int _find_leftmost_common(struct rx_node *or,
+ struct rx_node **l,
+ struct rx_node **r,
+ unsigned leftmost)
+{
+ struct rx_node *left = or->left, *right = or->right;
+ unsigned left_depth = _depth(left, leftmost);
+ unsigned right_depth = _depth(right, leftmost);
+
+ while (left_depth > right_depth && left->type != OR) {
+ left = LEFT(left);
+ left_depth--;
+ }
+
+ while (right_depth > left_depth && right->type != OR) {
+ right = LEFT(right);
+ right_depth--;
+ }
+
+ if (left_depth != right_depth)
+ return 0;
+
+ while (left_depth) {
+ if (left->type == CAT && right->type == CAT) {
+ if (_nodes_equal(LEFT(left), LEFT(right))) {
+ *l = left;
+ *r = right;
+ return 1;
+ }
+ }
+ if (left->type == OR || right->type == OR)
+ break;
+ left = LEFT(left);
+ right = LEFT(right);
+ left_depth--;
+ }
+
+ return 0;
+}
+
+/* If top node is OR, rotate (leftmost example) from ((ab)|((ac)|d)) to (((ab)|(ac))|d) */
+static int _rotate_ors(struct rx_node *r, unsigned leftmost)
+{
+ struct rx_node *old_node;
+
+ if (r->type != OR || RIGHT(r)->type != OR)
+ return 0;
+
+ old_node = RIGHT(r);
+
+ if (leftmost) {
+ r->right = RIGHT(old_node);
+ old_node->right = LEFT(old_node);
+ old_node->left = LEFT(r);
+ r->left = old_node;
+ } else {
+ r->left = RIGHT(old_node);
+ old_node->left = LEFT(old_node);
+ old_node->right = LEFT(r);
+ r->right = old_node;
+ }
+
+ return 1;
+}
+
+static struct rx_node *_exchange_nodes(struct dm_pool *mem, struct rx_node *r,
+ struct rx_node *left_cat, struct rx_node *right_cat,
+ unsigned leftmost)
+{
+ struct rx_node *new_r;
+
+ if (leftmost)
+ new_r = _node(mem, CAT, LEFT(left_cat), r);
+ else
+ new_r = _node(mem, CAT, r, LEFT(right_cat));
+
+ if (!new_r)
+ return_NULL;
+
+ memcpy(left_cat, RIGHT(left_cat), sizeof(*left_cat));
+ memcpy(right_cat, RIGHT(right_cat), sizeof(*right_cat));
+
+ return new_r;
+}
+
+static struct rx_node *_pass(struct dm_pool *mem,
+ struct rx_node *r,
+ int *changed)
+{
+ struct rx_node *left, *right;
+
+ /*
+ * walk the tree, optimising every 'or' node.
+ */
+ switch (r->type) {
+ case CAT:
+ if (!(r->left = _pass(mem, r->left, changed)))
+ return_NULL;
+
+ if (!(r->right = _pass(mem, r->right, changed)))
+ return_NULL;
+
+ break;
+
+ case STAR:
+ case PLUS:
+ case QUEST:
+ if (!(r->left = _pass(mem, r->left, changed)))
+ return_NULL;
+
+ break;
+ case OR:
+ /* It's important we optimise sub nodes first */
+ if (!(r->left = _pass(mem, r->left, changed)))
+ return_NULL;
+
+ if (!(r->right = _pass(mem, r->right, changed)))
+ return_NULL;
+ /*
+ * If rotate_ors changes the tree, left and right are stale,
+ * so just set 'changed' to repeat the search.
+ *
+ * FIXME Check we can't 'bounce' between left and right rotations here.
+ */
+ if (_find_leftmost_common(r, &left, &right, 1)) {
+ if (!_rotate_ors(r, 1))
+ r = _exchange_nodes(mem, r, left, right, 1);
+ *changed = 1;
+ } else if (_find_leftmost_common(r, &left, &right, 0)) {
+ if (!_rotate_ors(r, 0))
+ r = _exchange_nodes(mem, r, left, right, 0);
+ *changed = 1;
+ }
+ break;
+
+ case CHARSET:
+ break;
+ }
+
+ return r;
+}
+
+static struct rx_node *_optimise(struct dm_pool *mem, struct rx_node *r)
+{
+ /*
+ * We're looking for (or (... (cat <foo> a)) (... (cat <foo> b)))
+ * and want to turn it into (cat <foo> (or (... a) (... b)))
+ *
+ * (fa)|(fb) becomes f(a|b)
+ */
+
+ /*
+ * Initially done as an inefficient multipass algorithm.
+ */
+ int changed;
+
+ do {
+ changed = 0;
+ r = _pass(mem, r, &changed);
+ } while (r && changed);
+
+ return r;
+}
+
+/*----------------------------------------------------------------*/
+
+struct rx_node *rx_parse_tok(struct dm_pool *mem,
+ const char *begin, const char *end)
+{
+ struct rx_node *r;
+ struct parse_sp *ps = dm_pool_zalloc(mem, sizeof(*ps));
+
+ if (!ps)
+ return_NULL;
+
+ ps->mem = mem;
+ if (!(ps->charset = dm_bitset_create(mem, 256))) {
+ log_error("Regex charset allocation failed");
+ dm_pool_free(mem, ps);
+ return NULL;
+ }
+ ps->cursor = begin;
+ ps->rx_end = end;
+ _rx_get_token(ps); /* load the first token */
+
+ if (!(r = _or_term(ps))) {
+ log_error("Parse error in regex");
+ dm_pool_free(mem, ps);
+ return NULL;
+ }
+
+ if (!(r = _optimise(mem, r))) {
+ log_error("Regex optimisation error");
+ dm_pool_free(mem, ps);
+ return NULL;
+ }
+
+ return r;
+}
+
+struct rx_node *rx_parse_str(struct dm_pool *mem, const char *str)
+{
+ return rx_parse_tok(mem, str, str + strlen(str));
+}
diff --git a/device_mapper/regex/parse_rx.h b/device_mapper/regex/parse_rx.h
new file mode 100644
index 000000000..08970605d
--- /dev/null
+++ b/device_mapper/regex/parse_rx.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef _DM_PARSE_REGEX_H
+#define _DM_PARSE_REGEX_H
+
+enum {
+ CAT,
+ STAR,
+ PLUS,
+ OR,
+ QUEST,
+ CHARSET
+};
+
+/*
+ * We're never going to be running the regex on non-printable
+ * chars, so we can use a couple of these chars to represent the
+ * start and end of a string.
+ */
+#define HAT_CHAR 0x2
+#define DOLLAR_CHAR 0x3
+
+#define TARGET_TRANS '\0'
+
+struct rx_node {
+ int type;
+ dm_bitset_t charset;
+ struct rx_node *left, *right;
+
+ /* used to build the dfa for the toker */
+ unsigned charset_index;
+ int nullable, final;
+ dm_bitset_t firstpos;
+ dm_bitset_t lastpos;
+ dm_bitset_t followpos;
+};
+
+struct rx_node *rx_parse_str(struct dm_pool *mem, const char *str);
+struct rx_node *rx_parse_tok(struct dm_pool *mem,
+ const char *begin, const char *end);
+
+#endif
diff --git a/device_mapper/regex/ttree.c b/device_mapper/regex/ttree.c
new file mode 100644
index 000000000..62c5bf786
--- /dev/null
+++ b/device_mapper/regex/ttree.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "misc/dmlib.h"
+#include "ttree.h"
+
+struct node {
+ unsigned k;
+ struct node *l, *m, *r;
+ void *data;
+};
+
+struct ttree {
+ int klen;
+ struct dm_pool *mem;
+ struct node *root;
+};
+
+__attribute__((nonnull(1)))
+static struct node **_lookup_single(struct node **c, unsigned int k)
+{
+ while (*c) {
+ if (k < (*c)->k)
+ c = &((*c)->l);
+
+ else if (k > (*c)->k)
+ c = &((*c)->r);
+
+ else {
+ c = &((*c)->m);
+ break;
+ }
+ }
+
+ return c;
+}
+
+void *ttree_lookup(struct ttree *tt, unsigned *key)
+{
+ struct node **c = &tt->root;
+ int count = tt->klen;
+
+ while (*c && count) {
+ c = _lookup_single(c, *key++);
+ count--;
+ }
+
+ return *c ? (*c)->data : NULL;
+}
+
+static struct node *_tree_node(struct dm_pool *mem, unsigned int k)
+{
+ struct node *n = dm_pool_zalloc(mem, sizeof(*n));
+
+ if (n)
+ n->k = k;
+
+ return n;
+}
+
+int ttree_insert(struct ttree *tt, unsigned int *key, void *data)
+{
+ struct node **c = &tt->root;
+ int count = tt->klen;
+ unsigned int k;
+
+ do {
+ k = *key++;
+ c = _lookup_single(c, k);
+ count--;
+
+ } while (*c && count);
+
+ if (!*c) {
+ count++;
+
+ while (count--) {
+ if (!(*c = _tree_node(tt->mem, k)))
+ return_0;
+
+ if (count) {
+ k = *key++;
+ c = &((*c)->m);
+ }
+ }
+ }
+ (*c)->data = data;
+
+ return 1;
+}
+
+struct ttree *ttree_create(struct dm_pool *mem, unsigned int klen)
+{
+ struct ttree *tt;
+
+ if (!(tt = dm_pool_zalloc(mem, sizeof(*tt))))
+ return_NULL;
+
+ tt->klen = klen;
+ tt->mem = mem;
+ return tt;
+}
diff --git a/device_mapper/regex/ttree.h b/device_mapper/regex/ttree.h
new file mode 100644
index 000000000..8b62181f4
--- /dev/null
+++ b/device_mapper/regex/ttree.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef _DM_TTREE_H
+#define _DM_TTREE_H
+
+struct ttree;
+
+struct ttree *ttree_create(struct dm_pool *mem, unsigned int klen);
+
+void *ttree_lookup(struct ttree *tt, unsigned *key);
+int ttree_insert(struct ttree *tt, unsigned *key, void *data);
+
+#endif