summaryrefslogtreecommitdiff
path: root/include/my_trie.h
diff options
context:
space:
mode:
authorunknown <svoj@mysql.com>2005-06-07 21:17:09 +0500
committerunknown <svoj@mysql.com>2005-06-07 21:17:09 +0500
commite4c09a779df1ed7134c9798dee581ec424d27562 (patch)
treee078b53cf67c7821801e74c6458320652a49a9e5 /include/my_trie.h
parent0de81f007f688d5debf69209f264d343dc30f0c8 (diff)
downloadmariadb-git-e4c09a779df1ed7134c9798dee581ec424d27562.tar.gz
WL#2466 - Fulltext: "always-index" words
Trie and Aho-Corasick code added to distribution. include/Makefile.am: my_trie.h added to noinst_HEADERS. mysys/Makefile.am: trie.c added to libmysys_a_SOURCES.
Diffstat (limited to 'include/my_trie.h')
-rw-r--r--include/my_trie.h142
1 files changed, 142 insertions, 0 deletions
diff --git a/include/my_trie.h b/include/my_trie.h
new file mode 100644
index 00000000000..a8534cb11c1
--- /dev/null
+++ b/include/my_trie.h
@@ -0,0 +1,142 @@
+/* Copyright (C) 2005 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#ifndef _trie_h
+#define _trie_h
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct st_trie_node
+{
+ uint16 leaf; /* Depth from root node if match, 0 else */
+ byte c; /* Label on this edge */
+ struct st_trie_node *next; /* Next label */
+ struct st_trie_node *links; /* Array of edges leaving this node */
+ struct st_trie_node *fail; /* AC failure function */
+} TRIE_NODE;
+
+typedef struct st_trie
+{
+ TRIE_NODE root;
+ MEM_ROOT mem_root;
+ CHARSET_INFO *charset;
+ uint32 nnodes;
+ uint32 nwords;
+} TRIE;
+
+typedef struct st_ac_trie_state
+{
+ TRIE *trie;
+ TRIE_NODE *node;
+} AC_TRIE_STATE;
+
+extern TRIE *trie_init (TRIE *trie, CHARSET_INFO *charset);
+extern void trie_free (TRIE *trie);
+extern my_bool trie_insert (TRIE *trie, const byte *key, uint keylen);
+extern my_bool ac_trie_prepare (TRIE *trie);
+extern void ac_trie_init (TRIE *trie, AC_TRIE_STATE *state);
+
+
+/* `trie_goto' is internal function and shouldn't be used. */
+
+static inline TRIE_NODE *trie_goto (TRIE_NODE *root, TRIE_NODE *node, byte c)
+{
+ TRIE_NODE *next;
+ DBUG_ENTER("trie_goto");
+ for (next= node->links; next; next= next->next)
+ if (next->c == c)
+ DBUG_RETURN(next);
+ if (root == node)
+ DBUG_RETURN(root);
+ DBUG_RETURN(NULL);
+}
+
+
+/*
+ SYNOPSIS
+ int ac_trie_next (AC_TRIE_STATE *state, byte *c);
+ state - valid pointer to `AC_TRIE_STATE'
+ c - character to lookup
+
+ DESCRIPTION
+ Implementation of search using Aho-Corasick automaton.
+ Performs char-by-char search.
+
+ RETURN VALUE
+ `ac_trie_next' returns length of matched word or 0.
+*/
+
+static inline int ac_trie_next (AC_TRIE_STATE *state, byte *c)
+{
+ TRIE_NODE *root, *node;
+ DBUG_ENTER("ac_trie_next");
+ DBUG_ASSERT(state && c);
+ root= &state->trie->root;
+ node= state->node;
+ while (! (state->node= trie_goto(root, node, *c)))
+ node= node->fail;
+ DBUG_RETURN(state->node->leaf);
+}
+
+
+/*
+ SYNOPSIS
+ my_bool trie_search (TRIE *trie, const byte *key, uint keylen);
+ trie - valid pointer to `TRIE'
+ key - valid pointer to key to insert
+ keylen - non-0 key length
+
+ DESCRIPTION
+ Performs key lookup in trie.
+
+ RETURN VALUE
+ `trie_search' returns `true' if key is in `trie'. Otherwise,
+ `false' is returned.
+
+ NOTES
+ Consecutive search here is "best by test". arrays are very short, so
+ binary search or hashing would add too much complexity that would
+ overweight speed gain. Especially because compiler can optimize simple
+ consecutive loop better (tested)
+*/
+
+static inline my_bool trie_search (TRIE *trie, const byte *key, uint keylen)
+{
+ TRIE_NODE *node;
+ uint k;
+ DBUG_ENTER("trie_search");
+ DBUG_ASSERT(trie && key && keylen);
+ node= &trie->root;
+
+ for (k= 0; k < keylen; k++)
+ {
+ byte p;
+ if (! (node= node->links))
+ DBUG_RETURN(FALSE);
+ p= key[k];
+ while (p != node->c)
+ if (! (node= node->next))
+ DBUG_RETURN(FALSE);
+ }
+
+ DBUG_RETURN(node->leaf > 0);
+}
+
+#ifdef __cplusplus
+}
+#endif
+#endif