diff options
Diffstat (limited to 'myisam/ft_boolean_search.c')
-rw-r--r-- | myisam/ft_boolean_search.c | 222 |
1 files changed, 222 insertions, 0 deletions
diff --git a/myisam/ft_boolean_search.c b/myisam/ft_boolean_search.c new file mode 100644 index 00000000000..32dd428e493 --- /dev/null +++ b/myisam/ft_boolean_search.c @@ -0,0 +1,222 @@ +/* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ + +#include "ftdefs.h" + +/* search with boolean queries */ + +typedef struct st_all_in_one { + MI_INFO *info; + uint keynr; + uchar *keybuff; + MI_KEYDEF *keyinfo; + my_off_t key_root; + TREE dtree; + byte *start, *end; + uint total_yes, total_no; +} ALL_IN_ONE; + +typedef struct st_ft_superdoc { + FT_DOC doc; + //FT_WORD *word_ptr; + //double tmp_weight; + uint yes; + uint no; + uint wno; + ALL_IN_ONE *aio; +} FT_SUPERDOC; + +static int FT_SUPERDOC_cmp(FT_SUPERDOC *p1, FT_SUPERDOC *p2) +{ + if (p1->doc.dpos < p2->doc.dpos) + return -1; + if (p1->doc.dpos == p2->doc.dpos) + return 0; + return 1; +} + +static int walk_and_copy(FT_SUPERDOC *from, + uint32 count __attribute__((unused)), FT_DOC **to) +{ + if (from->yes == from->aio->total_yes && !from->no) + { + (*to)->dpos=from->doc.dpos; + (*to)->weight=from->doc.weight; + (*to)++; + } + return 0; +} + +static double _wghts[11]={ + 0.131687242798354, + 0.197530864197531, + 0.296296296296296, + 0.444444444444444, + 0.666666666666667, + 1.000000000000000, + 1.500000000000000, + 2.250000000000000, + 3.375000000000000, + 5.062500000000000, + 7.593750000000000}; +static double *wghts=_wghts+5; // wghts[i] = 1.5**i + +static double _nwghts[11]={ + -0.065843621399177, + -0.098765432098766, + -0.148148148148148, + -0.222222222222222, + -0.333333333333334, + -0.500000000000000, + -0.750000000000000, + -1.125000000000000, + -1.687500000000000, + -2.531250000000000, + -3.796875000000000}; +static double *nwghts=_nwghts+5; // nwghts[i] = -0.5*1.5**i + +int do_boolean(ALL_IN_ONE *aio, uint nested, + int yesno, int plusminus, bool pmsign) +{ + int r, res; + uint keylen, wno; + FT_SUPERDOC sdoc, *sptr; + TREE_ELEMENT *selem; + FT_WORD w; + FTB_PARAM param; + +#ifdef EVAL_RUN + return 1; +#endif /* EVAL_RUN */ + + param.prev=' '; + + for(wno=1; res=ft_get_word(&aio->start,aio->end,&w,¶m); wno++) + { + r=plusminus+param.plusminus; + if (param.pmsign^pmsign) + w.weight=nwghts[(r>5)?5:((r<-5)?-5:r)]; + else + w.weight=wghts[(r>5)?5:((r<-5)?-5:r)]; + + if (param.yesno>0) aio->total_yes++; + if (param.yesno<0) aio->total_no++; + + switch (res) { + case FTB_LBR: // ( + //if (do_boolean(aio,nested+1,my_yesno,plusminus+my_plusminus)) + // return 1; + // ??? + break; + case 1: // word + keylen=_ft_make_key(aio->info,aio->keynr,(char*) aio->keybuff,&w,0); + keylen-=HA_FT_WLEN; + + r=_mi_search(aio->info, aio->keyinfo, aio->keybuff, keylen, + SEARCH_FIND | SEARCH_PREFIX, aio->key_root); + + while (!r) + { + if (param.trunc) + r=_mi_compare_text(default_charset_info, + aio->info->lastkey+1,keylen-1, + aio->keybuff+1,keylen-1,0); + else + r=_mi_compare_text(default_charset_info, + aio->info->lastkey,keylen, + aio->keybuff,keylen,0); + if (r) break; + + sdoc.doc.dpos=aio->info->lastpos; + + /* saving document matched into dtree */ + if (!(selem=tree_insert(&aio->dtree, &sdoc, 0))) return 1; + + sptr=(FT_SUPERDOC *)ELEMENT_KEY((&aio->dtree), selem); + + if (selem->count==1) /* document's first match */ + { + sptr->yes=sptr->no=sptr->doc.weight=0; + sptr->aio=aio; + sptr->wno=0; + } + if (sptr->wno != wno) + { + if (param.yesno>0) sptr->yes++; + if (param.yesno<0) sptr->no++; + sptr->wno=wno; + } + sptr->doc.weight+=w.weight; + + if (_mi_test_if_changed(aio->info) == 0) + r=_mi_search_next(aio->info, aio->keyinfo, aio->info->lastkey, + aio->info->lastkey_length, SEARCH_BIGGER, + aio->key_root); + else + r=_mi_search(aio->info, aio->keyinfo, aio->info->lastkey, + aio->info->lastkey_length, SEARCH_BIGGER, + aio->key_root); + } + break; + case FTB_RBR: // ) + break; + } + } + return 0; +} + +FT_DOCLIST *ft_boolean_search(MI_INFO *info, uint keynr, byte *query, + uint query_len) +{ + ALL_IN_ONE aio; + FT_DOC *dptr; + FT_DOCLIST *dlist=NULL; + + aio.info=info; + aio.keynr=keynr; + aio.keybuff=aio.info->lastkey+aio.info->s->base.max_key_length; + aio.keyinfo=aio.info->s->keyinfo+keynr; + aio.key_root=aio.info->s->state.key_root[keynr]; + aio.start=query; + aio.end=query+query_len; + aio.total_yes=aio.total_no=0; + + init_tree(&aio.dtree,0,sizeof(FT_SUPERDOC),(qsort_cmp)&FT_SUPERDOC_cmp,0, + NULL); + + if (do_boolean(&aio,0,0,0,0)) + goto err; + + dlist=(FT_DOCLIST *)my_malloc(sizeof(FT_DOCLIST)+sizeof(FT_DOC)*(aio.dtree.elements_in_tree-1),MYF(0)); + if(!dlist) + goto err; + + dlist->ndocs=aio.dtree.elements_in_tree; + dlist->curdoc=-1; + dlist->info=aio.info; + dptr=dlist->doc; + + tree_walk(&aio.dtree, (tree_walk_action)&walk_and_copy, &dptr, left_root_right); + + dlist->ndocs=dptr - dlist->doc; + +err: + delete_tree(&aio.dtree); + return dlist; +} + |