summaryrefslogtreecommitdiff
path: root/mysys/trie.c
diff options
context:
space:
mode:
authorunknown <svoj@mysql.com>2005-06-07 21:17:09 +0500
committerunknown <svoj@mysql.com>2005-06-07 21:17:09 +0500
commite4c09a779df1ed7134c9798dee581ec424d27562 (patch)
treee078b53cf67c7821801e74c6458320652a49a9e5 /mysys/trie.c
parent0de81f007f688d5debf69209f264d343dc30f0c8 (diff)
downloadmariadb-git-e4c09a779df1ed7134c9798dee581ec424d27562.tar.gz
WL#2466 - Fulltext: "always-index" words
Trie and Aho-Corasick code added to distribution. include/Makefile.am: my_trie.h added to noinst_HEADERS. mysys/Makefile.am: trie.c added to libmysys_a_SOURCES.
Diffstat (limited to 'mysys/trie.c')
-rw-r--r--mysys/trie.c237
1 files changed, 237 insertions, 0 deletions
diff --git a/mysys/trie.c b/mysys/trie.c
new file mode 100644
index 00000000000..1f638f8f732
--- /dev/null
+++ b/mysys/trie.c
@@ -0,0 +1,237 @@
+/* Copyright (C) 2005 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ Implementation of trie and Aho-Corasick automaton.
+ Supports only charsets that can be compared byte-wise.
+
+ TODO:
+ Add character frequencies. Can increase lookup speed
+ up to 30%.
+ Implement character-wise comparision.
+*/
+
+
+#include "mysys_priv.h"
+#include <m_string.h>
+#include <my_trie.h>
+#include <my_base.h>
+
+
+/*
+ SYNOPSIS
+ TRIE *trie_init (TRIE *trie, CHARSET_INFO *charset);
+
+ DESCRIPTION
+ Allocates or initializes a `TRIE' object. If `trie' is a `NULL'
+ pointer, the function allocates, initializes, and returns a new
+ object. Otherwise, the object is initialized and the address of
+ the object is returned. If `trie_init()' allocates a new object,
+ it will be freed when `trie_free()' is called.
+
+ RETURN VALUE
+ An initialized `TRIE*' object. `NULL' if there was insufficient
+ memory to allocate a new object.
+*/
+
+TRIE *trie_init (TRIE *trie, CHARSET_INFO *charset)
+{
+ MEM_ROOT mem_root;
+ DBUG_ENTER("trie_init");
+ DBUG_ASSERT(charset);
+ init_alloc_root(&mem_root,
+ (sizeof(TRIE_NODE) * 128) + ALLOC_ROOT_MIN_BLOCK_SIZE,
+ sizeof(TRIE_NODE) * 128);
+ if (! trie)
+ {
+ if (! (trie= (TRIE *)alloc_root(&mem_root, sizeof(TRIE))))
+ {
+ free_root(&mem_root, MYF(0));
+ DBUG_RETURN(NULL);
+ }
+ }
+
+ memcpy(&trie->mem_root, &mem_root, sizeof(MEM_ROOT));
+ trie->root.leaf= 0;
+ trie->root.c= 0;
+ trie->root.next= NULL;
+ trie->root.links= NULL;
+ trie->root.fail= NULL;
+ trie->charset= charset;
+ trie->nnodes= 0;
+ trie->nwords= 0;
+ DBUG_RETURN(trie);
+}
+
+
+/*
+ SYNOPSIS
+ void trie_free (TRIE *trie);
+ trie - valid pointer to `TRIE'
+
+ DESCRIPTION
+ Frees the memory allocated for a `trie'.
+
+ RETURN VALUE
+ None.
+*/
+
+void trie_free (TRIE *trie)
+{
+ MEM_ROOT mem_root;
+ DBUG_ENTER("trie_free");
+ DBUG_ASSERT(trie);
+ memcpy(&mem_root, &trie->mem_root, sizeof(MEM_ROOT));
+ free_root(&mem_root, MYF(0));
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ SYNOPSIS
+ my_bool trie_insert (TRIE *trie, const byte *key, uint keylen);
+ trie - valid pointer to `TRIE'
+ key - valid pointer to key to insert
+ keylen - non-0 key length
+
+ DESCRIPTION
+ Inserts new key into trie.
+
+ RETURN VALUE
+ Upon successful completion, `trie_insert' returns `FALSE'. Otherwise
+ `TRUE' is returned.
+
+ NOTES
+ If this function fails you must assume `trie' is broken.
+ However it can be freed with trie_free().
+*/
+
+my_bool trie_insert (TRIE *trie, const byte *key, uint keylen)
+{
+ TRIE_NODE *node;
+ TRIE_NODE *next;
+ byte p;
+ uint k;
+ DBUG_ENTER("trie_insert");
+ DBUG_ASSERT(trie && key && keylen);
+ node= &trie->root;
+ trie->root.fail= NULL;
+ for (k= 0; k < keylen; k++)
+ {
+ p= key[k];
+ for (next= node->links; next; next= next->next)
+ if (next->c == p)
+ break;
+
+ if (! next)
+ {
+ TRIE_NODE *tmp= (TRIE_NODE *)alloc_root(&trie->mem_root,
+ sizeof(TRIE_NODE));
+ if (! tmp)
+ DBUG_RETURN(TRUE);
+ tmp->leaf= 0;
+ tmp->c= p;
+ tmp->links= tmp->fail= tmp->next= NULL;
+ trie->nnodes++;
+ if (! node->links)
+ {
+ node->links= tmp;
+ }
+ else
+ {
+ for (next= node->links; next->next; next= next->next) /* no-op */;
+ next->next= tmp;
+ }
+ node= tmp;
+ }
+ else
+ {
+ node= next;
+ }
+ }
+ node->leaf= keylen;
+ trie->nwords++;
+ DBUG_RETURN(FALSE);
+}
+
+
+/*
+ SYNOPSIS
+ my_bool trie_prepare (TRIE *trie);
+ trie - valid pointer to `TRIE'
+
+ DESCRIPTION
+ Constructs Aho-Corasick automaton.
+
+ RETURN VALUE
+ Upon successful completion, `trie_prepare' returns `FALSE'. Otherwise
+ `TRUE' is returned.
+*/
+
+my_bool ac_trie_prepare (TRIE *trie)
+{
+ TRIE_NODE **tmp_nodes;
+ TRIE_NODE *node;
+ uint32 fnode= 0;
+ uint32 lnode= 0;
+ DBUG_ENTER("trie_prepare");
+ DBUG_ASSERT(trie);
+
+ tmp_nodes= (TRIE_NODE **)my_malloc(trie->nnodes * sizeof(TRIE_NODE *), MYF(0));
+ if (! tmp_nodes)
+ DBUG_RETURN(TRUE);
+
+ trie->root.fail= &trie->root;
+ for (node= trie->root.links; node; node= node->next)
+ {
+ node->fail= &trie->root;
+ tmp_nodes[lnode++]= node;
+ }
+
+ while (fnode < lnode)
+ {
+ TRIE_NODE *current= (TRIE_NODE *)tmp_nodes[fnode++];
+ for (node= current->links; node; node= node->next)
+ {
+ TRIE_NODE *fail= current->fail;
+ tmp_nodes[lnode++]= node;
+ while (! (node->fail= trie_goto(&trie->root, fail, node->c)))
+ fail= fail->fail;
+ }
+ }
+ my_free((gptr)tmp_nodes, MYF(0));
+ DBUG_RETURN(FALSE);
+}
+
+
+/*
+ SYNOPSIS
+ void ac_trie_init (TRIE *trie, AC_TRIE_STATE *state);
+ trie - valid pointer to `TRIE'
+ state - value pointer to `AC_TRIE_STATE'
+
+ DESCRIPTION
+ Initializes `AC_TRIE_STATE' object.
+*/
+
+void ac_trie_init (TRIE *trie, AC_TRIE_STATE *state)
+{
+ DBUG_ENTER("ac_trie_init");
+ DBUG_ASSERT(trie && state);
+ state->trie= trie;
+ state->node= &trie->root;
+ DBUG_VOID_RETURN;
+}