summaryrefslogtreecommitdiff
path: root/myisam/ft_parser.c
diff options
context:
space:
mode:
authorunknown <monty@work.mysql.com>2001-04-11 13:04:03 +0200
committerunknown <monty@work.mysql.com>2001-04-11 13:04:03 +0200
commit8dd2e5b8d93d79965e833e3b979675240478c591 (patch)
tree9ad58a68370fc8feb8195b7b9c6423d58372093a /myisam/ft_parser.c
parent0c971641774f4d06f5442ef23af5d8c7ef9058ab (diff)
downloadmariadb-git-8dd2e5b8d93d79965e833e3b979675240478c591.tar.gz
Added all changes from old 4.0 version:
PSTACK, libmysqld and MySQL filesystem UPDATE ... ORDER BY DELETE ... ORDER BY New faster fulltext handling Faster compressed keys Makefile.am: Added support for pstack and libmysqld_dir acconfig.h: MySQL filesystem and PSTACK acinclude.m4: MySQL File system client/mysql.cc: Support for --xml configure.in: Pstack, MySQL FS and libmysqld_dir include/ft_global.h: Faster fulltext include/my_pthread.h: Made c++ safe include/myisam.h: Update for faster fulltext include/mysql_com.h: new my_net_read() include/violite.h: libmysqld libmysql/net.c: New protocol that supports big packets myisam/Makefile.am: Faster fulltext myisam/ft_parser.c: Faster fulltext myisam/ft_search.c: Faster fulltext myisam/ft_update.c: Faster fulltext myisam/ftdefs.h: Faster fulltext myisam/mi_check.c: Faster fulltext myisam/mi_open.c: Faster compressed keys myisam/mi_search.c: Faster compressed keys myisam/mi_update.c: Faster compressed keys myisam/myisamdef.h: Faster compressed keys myisam/sort.c: Faster compressed keys mysql-test/mysql-test-run.sh: --skip-innobase and --skip-bdb sql/ChangeLog: Changelog sql/Makefile.am: PSTACK sql/mysql_priv.h: New ORDER BY options and libmysqld sql/mysqld.cc: PSTACK sql/net_serv.cc: New protocol that supports big packets sql/share/estonian/errmsg.txt: New error messages sql/sql_base.cc: Better list_open_tabels sql/sql_delete.cc: ORDER BY for delete sql/sql_lex.cc: Added language convertation of all strings sql/sql_parse.cc: Changes for libmysqld Use new ORDER BY options sql/sql_show.cc: Character set convertations Use new list_open_tables function. sql/sql_update.cc: UPDATE ... ORDER BY sql/sql_yacc.yy: Clean up symbol definitions DELETE .. ORDER BY UPDATE .. ORDER BY sql/table.h: new OPEN_TABLE_LIST structure BitKeeper/etc/logging_ok: Logging to logging@openlogging.org accepted
Diffstat (limited to 'myisam/ft_parser.c')
-rw-r--r--myisam/ft_parser.c138
1 files changed, 115 insertions, 23 deletions
diff --git a/myisam/ft_parser.c b/myisam/ft_parser.c
index 7ea2e240c36..d1daa581446 100644
--- a/myisam/ft_parser.c
+++ b/myisam/ft_parser.c
@@ -83,13 +83,12 @@ FT_WORD * ft_linearize(MI_INFO *info, uint keynr, byte *keybuf, TREE *wtree)
tree_walk(wtree,(tree_walk_action)&walk_and_copy,&docstat,left_root_right);
}
delete_tree(wtree);
- my_free((char*) wtree,MYF(0));
if (!wlist)
return NULL;
docstat.list->pos=NULL;
- for(p=wlist;p->pos;p++)
+ for (p=wlist;p->pos;p++)
{
p->weight=PRENORM_IN_USE;
#ifdef EVAL_RUN
@@ -104,7 +103,7 @@ FT_WORD * ft_linearize(MI_INFO *info, uint keynr, byte *keybuf, TREE *wtree)
#endif
#endif /* EVAL_RUN */
- for(p=wlist;p->pos;p++)
+ for (p=wlist;p->pos;p++)
{
p->weight/=NORM_IN_USE;
}
@@ -112,40 +111,133 @@ FT_WORD * ft_linearize(MI_INFO *info, uint keynr, byte *keybuf, TREE *wtree)
return wlist;
}
+#define true_word_char(X) (isalnum(X) || (X)=='_')
#ifdef HYPHEN_IS_DELIM
-#define word_char(X) (isalnum(X) || (X)=='_' || (X)=='\'')
+#define misc_word_char(X) ((X)=='\'')
#else
-#define word_char(X) (isalnum(X) || (X)=='_' || (X)=='\'' || (X)=='-')
+#define misc_word_char(X) ((X)=='\'' || (X)=='-')
#endif
+#define word_char(X) (true_word_char(X) || misc_word_char(X))
-/* this is rather dumb first version of the parser */
-TREE * ft_parse(TREE *wtree, byte *doc, int doclen)
+byte ft_get_word(byte **start, byte *end, FT_WORD *word, FTB_PARAM *param)
{
- byte *end=doc+doclen;
- FT_WORD w;
+ byte *doc=*start;
+ int mwc;
+
+ param->yesno=param->plusminus=param->pmsign=0;
- if (!wtree)
+ while (doc<end)
{
- if (!(wtree=(TREE *)my_malloc(sizeof(TREE),MYF(0)))) return NULL;
- init_tree(wtree,0,sizeof(FT_WORD),(qsort_cmp)&FT_WORD_cmp,0,NULL);
+ for (;doc<end;doc++)
+ {
+ if (true_word_char(*doc)) break;
+ if (*doc == FTB_LBR || *doc == FTB_RBR)
+ {
+ param->prev=' ';
+ *start=doc+1;
+ return *doc;
+ }
+ if (param->prev == ' ')
+ {
+ switch (*doc) {
+ case FTB_YES: param->yesno=+1; continue;
+ case FTB_NO: param->yesno=-1; continue;
+ case FTB_INC: param->plusminus++; continue;
+ case FTB_DEC: param->plusminus--; continue;
+ case FTB_NEG: param->pmsign=!param->pmsign; continue;
+ default: break;
+ }
+ }
+ param->prev=*doc;
+ param->yesno=param->plusminus=param->pmsign=0;
+ }
+
+ mwc=0;
+ for (word->pos=doc; doc<end; doc++)
+ if (true_word_char(*doc))
+ mwc=0;
+ else if (!misc_word_char(*doc) || mwc++)
+ break;
+
+ param->prev='A'; // be sure *prev is true_word_char
+ word->len= (uint)(doc-word->pos) - mwc;
+ if (param->trunc=(doc<end && *doc == FTB_TRUNC))
+ doc++;
+
+ if (word->len >= MIN_WORD_LEN && word->len < MAX_WORD_LEN &&
+ !is_stopword(word->pos, word->len))
+ {
+ *start=doc;
+ return 1;
+ }
}
+ return 0;
+}
+
+byte ft_simple_get_word(byte **start, byte *end, FT_WORD *word)
+{
+ byte *doc=*start;
+ int mwc;
- w.weight=0;
while (doc<end)
{
for (;doc<end;doc++)
- if (word_char(*doc)) break;
- for (w.pos=doc; doc<end; doc++)
- if (!word_char(*doc)) break;
- if ((w.len= (uint) (doc-w.pos)) < MIN_WORD_LEN) continue;
- if (w.len >= HA_FT_MAXLEN) continue;
- if (is_stopword(w.pos, w.len)) continue;
- if (!tree_insert(wtree, &w, 0))
{
- delete_tree(wtree);
- my_free((char*) wtree,MYF(0));
- return NULL;
+ if (true_word_char(*doc)) break;
+ }
+
+ mwc=0;
+ for(word->pos=doc; doc<end; doc++)
+ if (true_word_char(*doc))
+ mwc=0;
+ else if (!misc_word_char(*doc) || mwc++)
+ break;
+
+ word->len= (uint)(doc-word->pos) - mwc;
+
+ if (word->len >= MIN_WORD_LEN && word->len < MAX_WORD_LEN &&
+ !is_stopword(word->pos, word->len))
+ {
+ *start=doc;
+ return 1;
}
}
+ return 0;
+}
+
+int is_boolean(byte *q, uint len)
+{
+ if (!len) return 0;
+ if (*q == FTB_YES || *q == FTB_NO) return 1;
+
+ for (++q; --len; ++q)
+ {
+ if ((*q == FTB_YES || *q == FTB_NO) && q[-1] == ' ' && true_word_char(q[1]))
+ return 1;
+ }
+ return 0;
+}
+
+TREE * ft_parse(TREE *wtree, byte *doc, int doclen)
+{
+ byte *end=doc+doclen;
+ int res;
+ FT_WORD w;
+
+ if (!is_tree_inited(wtree))
+ {
+ init_tree(wtree,0,sizeof(FT_WORD),(qsort_cmp)&FT_WORD_cmp,0,NULL);
+ }
+
+ while (res=ft_simple_get_word(&doc,end,&w))
+ {
+ if (!tree_insert(wtree, &w, 0))
+ goto err;
+ }
return wtree;
+
+err:
+ delete_tree(wtree);
+ return NULL;
}
+