diff options
Diffstat (limited to 'myisam')
-rw-r--r-- | myisam/Makefile.am | 2 | ||||
-rw-r--r-- | myisam/ft_boolean_search.c | 422 | ||||
-rw-r--r-- | myisam/ft_dump.c | 4 | ||||
-rw-r--r-- | myisam/ft_eval.c | 8 | ||||
-rw-r--r-- | myisam/ft_nlq_search.c | 127 | ||||
-rw-r--r-- | myisam/ft_parser.c | 2 | ||||
-rw-r--r-- | myisam/ft_search.c | 74 | ||||
-rw-r--r-- | myisam/ft_static.c | 21 | ||||
-rw-r--r-- | myisam/ft_test1.c | 8 | ||||
-rw-r--r-- | myisam/ftdefs.h | 23 | ||||
-rw-r--r-- | myisam/mi_search.c | 122 | ||||
-rw-r--r-- | myisam/mi_write.c | 10 |
12 files changed, 503 insertions, 320 deletions
diff --git a/myisam/Makefile.am b/myisam/Makefile.am index 6781116043f..d86462e9b84 100644 --- a/myisam/Makefile.am +++ b/myisam/Makefile.am @@ -45,7 +45,7 @@ libmyisam_a_SOURCES = mi_open.c mi_extra.c mi_info.c mi_rkey.c \ mi_range.c mi_dbug.c mi_checksum.c mi_log.c \ mi_changed.c mi_static.c mi_delete_all.c \ mi_delete_table.c mi_rename.c mi_check.c \ - ft_parser.c ft_search.c ft_stopwords.c ft_static.c \ + ft_parser.c ft_stopwords.c ft_static.c \ ft_update.c ft_boolean_search.c ft_nlq_search.c sort.c CLEANFILES = test?.MY? FT?.MY? isam.log mi_test_all DEFS = -DMAP_TO_USE_RAID diff --git a/myisam/ft_boolean_search.c b/myisam/ft_boolean_search.c index 8f3138af989..00f3ac8ff0a 100644 --- a/myisam/ft_boolean_search.c +++ b/myisam/ft_boolean_search.c @@ -16,53 +16,12 @@ /* Written by Sergei A. Golubchik, who has a shared copyright to this code */ +#define FT_CORE #include "ftdefs.h" +#include <queues.h> /* search with boolean queries */ -typedef struct st_all_in_one { - MI_INFO *info; - uint keynr; - uchar *keybuff; - MI_KEYDEF *keyinfo; - my_off_t key_root; - TREE dtree; - byte *start, *end; - uint total_yes, total_no; -} ALL_IN_ONE; - -typedef struct st_ft_superdoc { - FT_DOC doc; - //FT_WORD *word_ptr; - //double tmp_weight; - uint yes; - uint no; - uint wno; - ALL_IN_ONE *aio; -} FT_SUPERDOC; - -static int FT_SUPERDOC_cmp(void* cmp_arg __attribute__((unused)), - FT_SUPERDOC *p1, FT_SUPERDOC *p2) -{ - if (p1->doc.dpos < p2->doc.dpos) - return -1; - if (p1->doc.dpos == p2->doc.dpos) - return 0; - return 1; -} - -static int walk_and_copy(FT_SUPERDOC *from, - uint32 count __attribute__((unused)), FT_DOC **to) -{ - if (from->yes == from->aio->total_yes && !from->no) - { - (*to)->dpos=from->doc.dpos; - (*to)->weight=from->doc.weight; - (*to)++; - } - return 0; -} - static double _wghts[11]={ 0.131687242798354, 0.197530864197531, @@ -91,136 +50,301 @@ static double _nwghts[11]={ -3.796875000000000}; static double *nwghts=_nwghts+5; // nwghts[i] = -0.5*1.5**i -int do_boolean(ALL_IN_ONE *aio, uint nested __attribute__((unused)), - int yesno __attribute__((unused)), - int plusminus, bool pmsign) +typedef struct st_ftb_expr FTB_EXPR; +struct st_ftb_expr { + FTB_EXPR *up; + float weight; + int yesno; + my_off_t docid; + float cur_weight; + int yesses; /* number of "yes" words matched */ + int nos; /* number of "no" words matched */ + int ythresh; /* number of "yes" words in expr */ +}; + +typedef struct { + FTB_EXPR *up; + float weight; + int yesno; + int trunc; + my_off_t docid; + uint ndepth; + int len; + /* ... there can be docid cache added here. SerG */ + byte word[1]; +} FTB_WORD; + +typedef struct st_ft_info { + struct _ft_vft *please; + MI_INFO *info; + uint keynr; + int ok; + FTB_EXPR *root; + QUEUE queue; + MEM_ROOT mem_root; +} FTB; + +int FTB_WORD_cmp(void *v, byte *a, byte *b) { - int r, res; - uint keylen, wno; - FT_SUPERDOC sdoc, *sptr; - TREE_ELEMENT *selem; - FT_WORD w; - FTB_PARAM param; + /* ORDER BY docid, ndepth DESC */ + int i=CMP(((FTB_WORD *)a)->docid, ((FTB_WORD *)b)->docid); + if (!i) + i=CMP(((FTB_WORD *)b)->ndepth,((FTB_WORD *)a)->ndepth); + return i; +} -#ifdef EVAL_RUN - return 1; -#endif /* EVAL_RUN */ +void _ftb_parse_query(FTB *ftb, byte **start, byte *end, + FTB_EXPR *up, uint ndepth, uint depth) +{ + byte res; + FTB_PARAM param; + FT_WORD w; + FTB_WORD *ftbw; + FTB_EXPR *ftbe; + MI_INFO *info=ftb->info; + int r; + MI_KEYDEF *keyinfo=info->s->keyinfo+ftb->keynr; + my_off_t keyroot=info->s->state.key_root[ftb->keynr]; + uint extra=HA_FT_WLEN+info->s->rec_reflength; /* just a shortcut */ + + if (! ftb->ok) + return; param.prev=' '; - - for(wno=1; (res=ft_get_word(&aio->start,aio->end,&w,¶m)); wno++) + while (res=ft_get_word(start,end,&w,¶m)) { - r=plusminus+param.plusminus; - if (param.pmsign^pmsign) - w.weight=nwghts[(r>5)?5:((r<-5)?-5:r)]; - else - w.weight=wghts[(r>5)?5:((r<-5)?-5:r)]; - - if (param.yesno>0) aio->total_yes++; - if (param.yesno<0) aio->total_no++; - + byte r=param.plusminus; + float weight=(param.pmsign ? nwghts : wghts)[(r>5)?5:((r<-5)?-5:r)]; switch (res) { - case FTB_LBR: // ( - //if (do_boolean(aio,nested+1,my_yesno,plusminus+my_plusminus)) - // return 1; - // ??? - break; - case 1: // word - keylen=_ft_make_key(aio->info,aio->keynr,(char*) aio->keybuff,&w,0); - keylen-=HA_FT_WLEN; - - r=_mi_search(aio->info, aio->keyinfo, aio->keybuff, keylen, - SEARCH_FIND | SEARCH_PREFIX, aio->key_root); - - while (!r) - { - if (param.trunc) - r=_mi_compare_text(default_charset_info, - aio->info->lastkey+1,keylen-1, - aio->keybuff+1,keylen-1,0); + case FTB_LBR: + ftbe=(FTB_EXPR *)alloc_root(&ftb->mem_root, sizeof(FTB_EXPR)); + ftbe->yesno=param.yesno; + ftbe->weight=weight; + ftbe->up=up; + ftbe->ythresh=0; + ftbe->docid=HA_POS_ERROR; + if (ftbw->yesno > 0) up->ythresh++; + _ftb_parse_query(ftb, start, end, ftbe, depth+1, + (param.yesno<0 ? depth+1 : ndepth)); + break; + case FTB_RBR: + return; + case 1: + ftbw=(FTB_WORD *)alloc_root(&ftb->mem_root, + sizeof(FTB_WORD) + (param.trunc ? MI_MAX_KEY_BUFF : w.len+extra)); + ftbw->len=w.len + !param.trunc; + ftbw->yesno=param.yesno; + ftbw->trunc=param.trunc; /* 0 or 1 */ + ftbw->weight=weight; + ftbw->up=up; + ftbw->docid=HA_POS_ERROR; + ftbw->ndepth= param.yesno<0 ? depth : ndepth; + memcpy(ftbw->word+1, w.pos, w.len); + ftbw->word[0]=w.len; + if (ftbw->yesno > 0) up->ythresh++; + /*****************************************/ + r=_mi_search(info, keyinfo, ftbw->word, ftbw->len, + SEARCH_FIND | SEARCH_PREFIX, keyroot); + if (!r) + { + r=_mi_compare_text(default_charset_info, + info->lastkey+ftbw->trunc,ftbw->len, + ftbw->word+ftbw->trunc,ftbw->len,0); + } + if (r) /* not found */ + { + if (ftbw->yesno>0 && ftbw->up->up==0) + { /* this word MUST BE present in every document returned, + so we can abort the search right now */ + ftb->ok=0; + return; + } + } else - r=_mi_compare_text(default_charset_info, - aio->info->lastkey,keylen, - aio->keybuff,keylen,0); - if (r) break; - - sdoc.doc.dpos=aio->info->lastpos; + { + memcpy(ftbw->word, info->lastkey, info->lastkey_length); + ftbw->docid=info->lastpos; + queue_insert(& ftb->queue, (byte *)ftbw); + } + /*****************************************/ + break; + } + } + return; +} - /* saving document matched into dtree */ - if (!(selem=tree_insert(&aio->dtree, &sdoc, 0))) return 1; +FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query, + uint query_len, my_bool presort __attribute__((unused))) +{ + FTB *ftb; + FTB_EXPR *ftbe; + uint res; - sptr=(FT_SUPERDOC *)ELEMENT_KEY((&aio->dtree), selem); + if (!(ftb=(FTB *)my_malloc(sizeof(FTB), MYF(MY_WME)))) + return 0; + ftb->please=& _ft_vft_boolean; + ftb->ok=1; + ftb->info=info; + ftb->keynr=keynr; + + init_alloc_root(&ftb->mem_root, 1024, 1024); + + /* hack: instead of init_queue, we'll use reinit queue to be able + * to alloc queue with alloc_root() + */ + res=ftb->queue.max_elements=query_len/(ft_min_word_len+1); + ftb->queue.root=(byte **)alloc_root(&ftb->mem_root, (res+1)*sizeof(void*)); + reinit_queue(& ftb->queue, res, 0, 0, FTB_WORD_cmp, ftb); + ftbe=(FTB_EXPR *)alloc_root(&ftb->mem_root, sizeof(FTB_EXPR)); + ftbe->weight=ftbe->yesno=ftbe->nos=1; + ftbe->up=0; + ftbe->ythresh=0; + ftbe->docid=HA_POS_ERROR; + ftb->root=ftbe; + _ftb_parse_query(ftb, &query, query+query_len, ftbe, 0, 0); + return ftb; +} - if (selem->count==1) /* document's first match */ +int ft_boolean_read_next(FT_INFO *ftb, char *record) +{ + FTB_EXPR *ftbe, *up; + FTB_WORD *ftbw; + MI_INFO *info=ftb->info; + MI_KEYDEF *keyinfo=info->s->keyinfo+ftb->keynr; + my_off_t keyroot=info->s->state.key_root[ftb->keynr]; + my_off_t curdoc; + int r; + + /* black magic ON */ + if ((int) _mi_check_index(info, ftb->keynr) < 0) + return my_errno; + if (_mi_readinfo(info, F_RDLCK, 1)) + return my_errno; + /* black magic OFF */ + + if (!ftb->queue.elements) + return my_errno=HA_ERR_END_OF_FILE; + + while(ftb->ok && + (curdoc=((FTB_WORD *)queue_top(& ftb->queue))->docid) != HA_POS_ERROR) + { + while (curdoc==(ftbw=(FTB_WORD *)queue_top(& ftb->queue))->docid) + { + float weight=ftbw->weight; + int yn=ftbw->yesno; + for (ftbe=ftbw->up; ftbe; ftbe=ftbe->up) + { + if (ftbe->docid != curdoc) { - sptr->yes=0; - sptr->no=0; - sptr->doc.weight=0; - sptr->aio=aio; - sptr->wno=0; + ftbe->cur_weight=ftbe->yesses=ftbe->nos=0; + ftbe->docid=curdoc; } - if (sptr->wno != wno) + if (yn>0) { - if (param.yesno>0) sptr->yes++; - if (param.yesno<0) sptr->no++; - sptr->wno=wno; + ftbe->cur_weight+=weight; + if (++ftbe->yesses >= ftbe->ythresh && !ftbe->nos) + { + yn=ftbe->yesno; + weight=ftbe->cur_weight*ftbe->weight; + } + else + break; + } + else + if (yn<0) + { + /* NOTE: special sort function of queue assures that all yn<0 + * events for every particular subexpression will + * "auto-magically" happen BEFORE all yn>=0 events. So no + * already matched expression can become not-matched again. + */ + ++ftbe->nos; + break; } - sptr->doc.weight+=w.weight; - - if (_mi_test_if_changed(aio->info) == 0) - r=_mi_search_next(aio->info, aio->keyinfo, aio->info->lastkey, - aio->info->lastkey_length, SEARCH_BIGGER, - aio->key_root); else - r=_mi_search(aio->info, aio->keyinfo, aio->info->lastkey, - USE_WHOLE_KEY, SEARCH_BIGGER, - aio->key_root); + /* if (yn==0) */ + { + if (ftbe->yesses >= ftbe->ythresh && !ftbe->nos) + { + yn=ftbe->yesno; + ftbe->cur_weight=weight; + weight*=ftbe->weight; + } + else + { + ftbe->cur_weight+=weight; + break; + } + } + } + /* update queue */ + r=_mi_search(info, keyinfo, ftbw->word, ftbw->len, + SEARCH_BIGGER , keyroot); + if (!r) + { + r=_mi_compare_text(default_charset_info, + info->lastkey+ftbw->trunc,ftbw->len, + ftbw->word+ftbw->trunc,ftbw->len,0); + } + if (r) /* not found */ + { + ftbw->docid=HA_POS_ERROR; + if (ftbw->yesno>0 && ftbw->up->up==0) + { /* this word MUST BE present in every document returned, + so we can stop the search right now */ + ftb->ok=0; + } + } + else + { + memcpy(ftbw->word, info->lastkey, info->lastkey_length); + ftbw->docid=info->lastpos; + } + queue_replaced(& ftb->queue); + } + + ftbe=ftb->root; + if (ftbe->cur_weight>0 && ftbe->yesses>=ftbe->ythresh && !ftbe->nos) + { + /* curdoc matched ! */ + info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); /* why is this ? */ + + /* info->lastpos=curdoc; */ /* do I need this ? */ + if (!(*info->read_record)(info,curdoc,record)) + { + info->update|= HA_STATE_AKTIV; /* Record is read */ + return 0; } - break; - case FTB_RBR: // ) - break; + return my_errno; } } - return 0; + return my_errno=HA_ERR_END_OF_FILE; } -FT_DOCLIST *ft_boolean_search(MI_INFO *info, uint keynr, byte *query, - uint query_len) +float ft_boolean_find_relevance(FT_INFO *ftb, my_off_t docid) { - ALL_IN_ONE aio; - FT_DOC *dptr; - FT_DOCLIST *dlist=NULL; - - aio.info=info; - aio.keynr=keynr; - aio.keybuff=aio.info->lastkey+aio.info->s->base.max_key_length; - aio.keyinfo=aio.info->s->keyinfo+keynr; - aio.key_root=aio.info->s->state.key_root[keynr]; - aio.start=query; - aio.end=query+query_len; - aio.total_yes=aio.total_no=0; - - init_tree(&aio.dtree,0,0,sizeof(FT_SUPERDOC),(qsort_cmp2)&FT_SUPERDOC_cmp,0, - NULL, NULL); - - if (do_boolean(&aio,0,0,0,0)) - goto err; - - dlist=(FT_DOCLIST *)my_malloc(sizeof(FT_DOCLIST)+sizeof(FT_DOC)*(aio.dtree.elements_in_tree-1),MYF(0)); - if(!dlist) - goto err; + fprintf(stderr, "ft_boolean_find_relevance called!\n"); + return -1.0; /* to be done via str scan */ +} - dlist->ndocs=aio.dtree.elements_in_tree; - dlist->curdoc=-1; - dlist->info=aio.info; - dptr=dlist->doc; +void ft_boolean_close_search(FT_INFO *ftb) +{ + free_root(& ftb->mem_root, MYF(0)); + my_free((gptr)ftb,MYF(0)); +} - tree_walk(&aio.dtree, (tree_walk_action)&walk_and_copy, &dptr, left_root_right); +float ft_boolean_get_relevance(FT_INFO *ftb) +{ + return ftb->root->cur_weight; +} - dlist->ndocs=dptr - dlist->doc; +my_off_t ft_boolean_get_docid(FT_INFO *ftb) +{ + return HA_POS_ERROR; +} -err: - delete_tree(&aio.dtree); - return dlist; +void ft_boolean_reinit_search(FT_INFO *ftb) +{ + fprintf(stderr, "ft_boolean_reinit_search called!\n"); } diff --git a/myisam/ft_dump.c b/myisam/ft_dump.c index 42439085132..ee2c3be0710 100644 --- a/myisam/ft_dump.c +++ b/myisam/ft_dump.c @@ -77,7 +77,7 @@ int main(int argc,char *argv[]) ft_init_stopwords(ft_precompiled_stopwords); - result=ft_init_search(info,inx,query,strlen(query),1); + result=ft_nlq_init_search(info,inx,query,strlen(query),1); if(!result) goto err; @@ -87,7 +87,7 @@ int main(int argc,char *argv[]) for(i=0 ; i<result->ndocs ; i++) printf("%9qx %20.7f\n",result->doc[i].dpos,result->doc[i].weight); - ft_close_search(result); + ft_nlq_close_search(result); } else { diff --git a/myisam/ft_eval.c b/myisam/ft_eval.c index 9466104100a..1f4b0cb4563 100644 --- a/myisam/ft_eval.c +++ b/myisam/ft_eval.c @@ -83,23 +83,23 @@ int main(int argc,char *argv[]) for(i=1;create_record(record,qf);i++) { FT_DOCLIST *result; double w; int t,err; - result=ft_init_search(file,0,blob_record,(uint) strlen(blob_record),1); + result=ft_nlq_init_search(file,0,blob_record,(uint) strlen(blob_record),1); if(!result) { printf("Query %d failed with errno %3d\n",i,my_errno); goto err; } if (!silent) printf("Query %d. Found: %d.\n",i,result->ndocs); - for(j=0;(err=ft_read_next(result, read_record))==0;j++) { + for(j=0;(err=ft_nlq_read_next(result, read_record))==0;j++) { t=uint2korr(read_record); - w=ft_get_relevance(result); + w=ft_nlq_get_relevance(result); printf("%d %.*s %f\n",i,t,read_record+2,w); } if(err != HA_ERR_END_OF_FILE) { printf("ft_read_next %d failed with errno %3d\n",j,my_errno); goto err; } - ft_close_search(result); + ft_nlq_close_search(result); } if (mi_close(file)) goto err; diff --git a/myisam/ft_nlq_search.c b/myisam/ft_nlq_search.c index 644becb27ab..f0f878a7f16 100644 --- a/myisam/ft_nlq_search.c +++ b/myisam/ft_nlq_search.c @@ -16,10 +16,24 @@ /* Written by Sergei A. Golubchik, who has a shared copyright to this code */ +#define FT_CORE #include "ftdefs.h" /* search with natural language queries */ +typedef struct ft_doc_rec { + my_off_t dpos; + double weight; +} FT_DOC; + +struct st_ft_info { + struct _ft_vft *please; + MI_INFO *info; + int ndocs; + int curdoc; + FT_DOC doc[1]; +}; + typedef struct st_all_in_one { MI_INFO *info; uint keynr; @@ -126,7 +140,7 @@ static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio) aio->key_root); else r=_mi_search(aio->info, aio->keyinfo, aio->info->lastkey, - USE_WHOLE_KEY, SEARCH_BIGGER, + aio->info->lastkey_length, SEARCH_BIGGER, aio->key_root); } if(doc_cnt) { @@ -147,19 +161,32 @@ static int walk_and_copy(FT_SUPERDOC *from, return 0; } -FT_DOCLIST *ft_nlq_search(MI_INFO *info, uint keynr, byte *query, - uint query_len) +static int FT_DOC_cmp(FT_DOC *a, FT_DOC *b) +{ + return sgn(b->weight - a->weight); +} + +FT_INFO *ft_init_nlq_search(MI_INFO *info, uint keynr, byte *query, + uint query_len, my_bool presort) { TREE *wtree, allocated_wtree; - ALL_IN_ONE aio; + ALL_IN_ONE aio; FT_DOC *dptr; - FT_DOCLIST *dlist=NULL; + FT_INFO *dlist=NULL; + my_off_t saved_lastpos=info->lastpos; + +/* black magic ON */ + if ((int) (keynr = _mi_check_index(info,keynr)) < 0) + return NULL; + if (_mi_readinfo(info,F_RDLCK,1)) + return NULL; +/* black magic OFF */ aio.info=info; aio.keynr=keynr; - aio.keybuff=aio.info->lastkey+aio.info->s->base.max_key_length; - aio.keyinfo=aio.info->s->keyinfo+keynr; - aio.key_root=aio.info->s->state.key_root[keynr]; + aio.keybuff=info->lastkey+info->s->base.max_key_length; + aio.keyinfo=info->s->keyinfo+keynr; + aio.key_root=info->s->state.key_root[keynr]; bzero(&allocated_wtree,sizeof(allocated_wtree)); @@ -167,26 +194,96 @@ FT_DOCLIST *ft_nlq_search(MI_INFO *info, uint keynr, byte *query, NULL, NULL); if(!(wtree=ft_parse(&allocated_wtree,query,query_len))) - return NULL; + goto err; if(tree_walk(wtree, (tree_walk_action)&walk_and_match, &aio, - left_root_right)) - goto err; + left_root_right)) + goto err2; - dlist=(FT_DOCLIST *)my_malloc(sizeof(FT_DOCLIST)+sizeof(FT_DOC)*(aio.dtree.elements_in_tree-1),MYF(0)); + dlist=(FT_INFO *)my_malloc(sizeof(FT_INFO)+ + sizeof(FT_DOC)*(aio.dtree.elements_in_tree-1),MYF(0)); if(!dlist) - goto err; + goto err2; + dlist->please=& _ft_vft_nlq; dlist->ndocs=aio.dtree.elements_in_tree; dlist->curdoc=-1; dlist->info=aio.info; dptr=dlist->doc; - tree_walk(&aio.dtree, (tree_walk_action)&walk_and_copy, &dptr, left_root_right); + tree_walk(&aio.dtree, (tree_walk_action)&walk_and_copy, + &dptr, left_root_right); -err: + if(presort) + qsort(dlist->doc, dlist->ndocs, sizeof(FT_DOC), (qsort_cmp)&FT_DOC_cmp); + +err2: delete_tree(wtree); delete_tree(&aio.dtree); + +err: + info->lastpos=saved_lastpos; return dlist; } +int ft_nlq_read_next(FT_INFO *handler, char *record) +{ + MI_INFO *info= (MI_INFO *) handler->info; + + if (++handler->curdoc >= handler->ndocs) + { + --handler->curdoc; + return HA_ERR_END_OF_FILE; + } + + info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + + info->lastpos=handler->doc[handler->curdoc].dpos; + if (!(*info->read_record)(info,info->lastpos,record)) + { + info->update|= HA_STATE_AKTIV; /* Record is read */ + return 0; + } + return my_errno; +} + +float ft_nlq_find_relevance(FT_INFO *handler, my_off_t docid) +{ + int a,b,c; + FT_DOC *docs=handler->doc; + + // Assuming docs[] is sorted by dpos... + + for (a=0, b=handler->ndocs, c=(a+b)/2; b-a>1; c=(a+b)/2) + { + if (docs[c].dpos > docid) + b=c; + else + a=c; + } + if (docs[a].dpos == docid) + return docs[a].weight; + else + return 0.0; +} + +void ft_nlq_close_search(FT_INFO *handler) +{ + my_free((gptr)handler,MYF(0)); +} + +float ft_nlq_get_relevance(FT_INFO *handler) +{ + return handler->doc[handler->curdoc].weight; +} + +my_off_t ft_nlq_get_docid(FT_INFO *handler) +{ + return handler->doc[handler->curdoc].dpos; +} + +void ft_nlq_reinit_search(FT_INFO *handler) +{ + handler->curdoc=-1; +} + diff --git a/myisam/ft_parser.c b/myisam/ft_parser.c index 83b0956a752..466f1dfe021 100644 --- a/myisam/ft_parser.c +++ b/myisam/ft_parser.c @@ -135,7 +135,7 @@ byte ft_get_word(byte **start, byte *end, FT_WORD *word, FTB_PARAM *param) if (true_word_char(*doc)) break; if (*doc == FTB_LBR || *doc == FTB_RBR) { - param->prev=' '; + /* param->prev=' '; */ *start=doc+1; return *doc; } diff --git a/myisam/ft_search.c b/myisam/ft_search.c deleted file mode 100644 index 6970fa2835d..00000000000 --- a/myisam/ft_search.c +++ /dev/null @@ -1,74 +0,0 @@ -/* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - -/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ - -#include "ftdefs.h" - -/* queries myisam and returns list of documents matched */ - -static int FT_DOC_cmp(FT_DOC *a, FT_DOC *b) -{ - return sgn(b->weight - a->weight); -} - -FT_DOCLIST *ft_init_search(void *info, uint keynr, byte *query, - uint query_len, my_bool presort) -{ - FT_DOCLIST *dlist; - my_off_t saved_lastpos=((MI_INFO *)info)->lastpos; - -/* black magic ON */ - if ((int) (keynr = _mi_check_index((MI_INFO *)info,keynr)) < 0) - return NULL; - if (fast_mi_readinfo((MI_INFO *) info)) - return NULL; -/* black magic OFF */ - - if (is_boolean(query, query_len)) - dlist=ft_boolean_search(info,keynr,query,query_len); - else - dlist=ft_nlq_search(info,keynr,query,query_len); - - if(dlist && presort) - { - qsort(dlist->doc, dlist->ndocs, sizeof(FT_DOC), (qsort_cmp)&FT_DOC_cmp); - } - - ((MI_INFO *)info)->lastpos=saved_lastpos; - return dlist; -} - -int ft_read_next(FT_DOCLIST *handler, char *record) -{ - MI_INFO *info= (MI_INFO *) handler->info; - - if (++handler->curdoc >= handler->ndocs) - { - --handler->curdoc; - return HA_ERR_END_OF_FILE; - } - - info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); - - info->lastpos=handler->doc[handler->curdoc].dpos; - if (!(*info->read_record)(info,info->lastpos,record)) - { - info->update|= HA_STATE_AKTIV; /* Record is read */ - return 0; - } - return my_errno; -} diff --git a/myisam/ft_static.c b/myisam/ft_static.c index 09afadec23f..494d7638d67 100644 --- a/myisam/ft_static.c +++ b/myisam/ft_static.c @@ -43,10 +43,29 @@ const MI_KEYSEG ft_keysegs[FT_SEGS]={ }, #endif /* EVAL_RUN */ { - HA_FT_WTYPE, 7, 0, 0, 0, 0, HA_FT_WLEN, 0, 0, NULL + HA_FT_WTYPE, 7, 0, 0, 0, HA_NO_SORT, HA_FT_WLEN, 0, 0, NULL } }; +const struct _ft_vft _ft_vft_nlq = { + ft_nlq_read_next, ft_nlq_find_relevance, ft_nlq_close_search, + ft_nlq_get_relevance, ft_nlq_get_docid, ft_nlq_reinit_search +}; +const struct _ft_vft _ft_vft_boolean = { + ft_boolean_read_next, ft_boolean_find_relevance, ft_boolean_close_search, + ft_boolean_get_relevance, ft_boolean_get_docid, ft_boolean_reinit_search +}; + +FT_INFO *(*_ft_init_vft[2])(MI_INFO *, uint, byte *, uint, my_bool) = +{ ft_init_nlq_search, ft_init_boolean_search }; + +FT_INFO *ft_init_search(uint mode, void *info, uint keynr, + byte *query, uint query_len, my_bool presort) +{ + return (*_ft_init_vft[mode])((MI_INFO *)info, keynr, + query, query_len, presort); +} + const char *ft_precompiled_stopwords[] = { #ifdef COMPILE_STOPWORDS_IN diff --git a/myisam/ft_test1.c b/myisam/ft_test1.c index 5093b591fb2..61d9a50e9c3 100644 --- a/myisam/ft_test1.c +++ b/myisam/ft_test1.c @@ -137,7 +137,7 @@ static int run_test(const char *filename) printf("- Reading rows with key\n"); for (i=0 ; i < NQUERIES ; i++) { FT_DOCLIST *result; - result=ft_init_search(file,0,(char*) query[i],strlen(query[i]),1); + result=ft_nlq_init_search(file,0,(char*) query[i],strlen(query[i]),1); if(!result) { printf("Query %d: `%s' failed with errno %3d\n",i,query[i],my_errno); continue; @@ -145,7 +145,7 @@ static int run_test(const char *filename) printf("Query %d: `%s'. Found: %d. Top five documents:\n", i,query[i],result->ndocs); for(j=0;j<5;j++) { double w; int err; - err=ft_read_next(result, read_record); + err=ft_nlq_read_next(result, read_record); if(err==HA_ERR_END_OF_FILE) { printf("No more matches!\n"); break; @@ -153,7 +153,7 @@ static int run_test(const char *filename) printf("ft_read_next %d failed with errno %3d\n",j,my_errno); break; } - w=ft_get_relevance(result); + w=ft_nlq_get_relevance(result); if(key_field == FIELD_VARCHAR) { uint l; char *p; @@ -164,7 +164,7 @@ static int run_test(const char *filename) printf("%10.7f: %.*s\n",w,recinfo[1].length, recinfo[0].length+read_record); } - ft_close_search(result); + ft_nlq_close_search(result); } if (mi_close(file)) goto err; diff --git a/myisam/ftdefs.h b/myisam/ftdefs.h index 1a017d3c73a..9eedf57c759 100644 --- a/myisam/ftdefs.h +++ b/myisam/ftdefs.h @@ -95,9 +95,6 @@ extern ulong collstat; #define FTB_NEG '~' #define FTB_TRUNC '*' -// #define FTB_MAX_SUBEXPR 255 -// #define FTB_MAX_DEPTH 16 - typedef struct st_ft_word { byte * pos; uint len; @@ -116,7 +113,6 @@ typedef struct st_ftb_param { } FTB_PARAM; int is_stopword(char *word, uint len); -int is_boolean(byte *q, uint len); uint _ft_make_key(MI_INFO *, uint , byte *, FT_WORD *, my_off_t); @@ -127,6 +123,21 @@ TREE * ft_parse(TREE *, byte *, int); FT_WORD * ft_linearize(MI_INFO *, uint, byte *, TREE *); FT_WORD * _mi_ft_parserecord(MI_INFO *, uint , byte *, const byte *); -FT_DOCLIST * ft_nlq_search(MI_INFO *, uint, byte *, uint); -FT_DOCLIST * ft_boolean_search(MI_INFO *, uint, byte *, uint); +const struct _ft_vft _ft_vft_nlq; +FT_INFO *ft_init_nlq_search(MI_INFO *, uint, byte *, uint, my_bool); +int ft_nlq_read_next(FT_INFO *, char *); +float ft_nlq_find_relevance(FT_INFO *, my_off_t ); +void ft_nlq_close_search(FT_INFO *); +float ft_nlq_get_relevance(FT_INFO *); +my_off_t ft_nlq_get_docid(FT_INFO *); +void ft_nlq_reinit_search(FT_INFO *); + +const struct _ft_vft _ft_vft_boolean; +FT_INFO *ft_init_boolean_search(MI_INFO *, uint, byte *, uint, my_bool); +int ft_boolean_read_next(FT_INFO *, char *); +float ft_boolean_find_relevance(FT_INFO *, my_off_t ); +void ft_boolean_close_search(FT_INFO *); +float ft_boolean_get_relevance(FT_INFO *); +my_off_t ft_boolean_get_docid(FT_INFO *); +void ft_boolean_reinit_search(FT_INFO *); diff --git a/myisam/mi_search.c b/myisam/mi_search.c index 96ddd9d789d..49777498405 100644 --- a/myisam/mi_search.c +++ b/myisam/mi_search.c @@ -19,8 +19,6 @@ #include "fulltext.h" #include "m_ctype.h" -#define CMP(a,b) (a<b ? -1 : a == b ? 0 : 1) - static my_bool _mi_get_prev_key(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page, uchar *key, uchar *keypos, uint *return_key_length); @@ -316,7 +314,7 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, while (page < end) { uint packed= *page & 128; - + vseg=page; if (keyinfo->seg->length >= 127) { @@ -332,7 +330,7 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, prefix_len=len; else { - prefix_len=suffix_len; + prefix_len=suffix_len; get_key_length(suffix_len,vseg); } } @@ -358,7 +356,7 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, for (keyseg=keyinfo->seg+1 ; keyseg->type ; keyseg++ ) { - + if (keyseg->flag & HA_NULL_PART) { if (!(*from++)) @@ -722,13 +720,14 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, for ( ; (int) key_length >0 ; key_length=next_key_length, keyseg++) { uchar *end; + uint piks=! (keyseg->flag & HA_NO_SORT); (*diff_pos)++; /* Handle NULL part */ if (keyseg->null_bit) { key_length--; - if (*a != *b) + if (*a != *b && piks) { flag = (int) *a - (int) *b; return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); @@ -754,9 +753,9 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, get_key_pack_length(b_length,pack_length,b); next_key_length=key_length-b_length-pack_length; - if ((flag=_mi_compare_text(keyseg->charset,a,a_length,b,b_length, - (my_bool) ((nextflag & SEARCH_PREFIX) && - next_key_length <= 0)))) + if (piks && + (flag=_mi_compare_text(keyseg->charset,a,a_length,b,b_length, + (my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0)))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a+=a_length; b+=b_length; @@ -772,9 +771,9 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, while (b_length && b[b_length-1] == ' ') b_length--; } - if ((flag=_mi_compare_text(keyseg->charset,a,a_length,b,b_length, - (my_bool) ((nextflag & SEARCH_PREFIX) && - next_key_length <= 0)))) + if (piks && + (flag=_mi_compare_text(keyseg->charset,a,a_length,b,b_length, + (my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0)))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a=end; b+=length; @@ -788,9 +787,9 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, get_key_pack_length(b_length,pack_length,b); next_key_length=key_length-b_length-pack_length; - if ((flag=compare_bin(a,a_length,b,b_length, - (my_bool) ((nextflag & SEARCH_PREFIX) && - next_key_length <= 0)))) + if (piks && + (flag=compare_bin(a,a_length,b,b_length, + (my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0)))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a+=a_length; b+=b_length; @@ -799,9 +798,9 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, else { uint length=keyseg->length; - if ((flag=compare_bin(a,length,b,length, - (my_bool) ((nextflag & SEARCH_PREFIX) && - next_key_length <= 0)))) + if (piks && + (flag=compare_bin(a,length,b,length, + (my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0)))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a+=length; b+=length; @@ -814,9 +813,9 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, get_key_pack_length(b_length,pack_length,b); next_key_length=key_length-b_length-pack_length; - if ((flag=_mi_compare_text(keyseg->charset,a,a_length,b,b_length, - (my_bool) ((nextflag & SEARCH_PREFIX) && - next_key_length <= 0)))) + if (piks && + (flag=_mi_compare_text(keyseg->charset,a,a_length,b,b_length, + (my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0)))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a+=a_length; b+=b_length; @@ -830,9 +829,9 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, get_key_pack_length(b_length,pack_length,b); next_key_length=key_length-b_length-pack_length; - if ((flag=compare_bin(a,a_length,b,b_length, - (my_bool) ((nextflag & SEARCH_PREFIX) && - next_key_length <= 0)))) + if (piks && + (flag=compare_bin(a,a_length,b,b_length, + (my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0)))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a+=a_length; b+=b_length; @@ -843,7 +842,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, { int i_1= (int) *((signed char*) a); int i_2= (int) *((signed char*) b); - if ((flag = CMP(i_1,i_2))) + if (piks && (flag = CMP(i_1,i_2))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a= end; b++; @@ -852,7 +851,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, case HA_KEYTYPE_SHORT_INT: s_1= mi_sint2korr(a); s_2= mi_sint2korr(b); - if ((flag = CMP(s_1,s_2))) + if (piks && (flag = CMP(s_1,s_2))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a= end; b+= 2; /* sizeof(short int); */ @@ -862,7 +861,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, uint16 us_1,us_2; us_1= mi_sint2korr(a); us_2= mi_sint2korr(b); - if ((flag = CMP(us_1,us_2))) + if (piks && (flag = CMP(us_1,us_2))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a= end; b+=2; /* sizeof(short int); */ @@ -871,7 +870,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, case HA_KEYTYPE_LONG_INT: l_1= mi_sint4korr(a); l_2= mi_sint4korr(b); - if ((flag = CMP(l_1,l_2))) + if (piks && (flag = CMP(l_1,l_2))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a= end; b+= 4; /* sizeof(long int); */ @@ -879,7 +878,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, case HA_KEYTYPE_ULONG_INT: u_1= mi_sint4korr(a); u_2= mi_sint4korr(b); - if ((flag = CMP(u_1,u_2))) + if (piks && (flag = CMP(u_1,u_2))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a= end; b+= 4; /* sizeof(long int); */ @@ -887,7 +886,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, case HA_KEYTYPE_INT24: l_1=mi_sint3korr(a); l_2=mi_sint3korr(b); - if ((flag = CMP(l_1,l_2))) + if (piks && (flag = CMP(l_1,l_2))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a= end; b+= 3; @@ -895,7 +894,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, case HA_KEYTYPE_UINT24: l_1=mi_uint3korr(a); l_2=mi_uint3korr(b); - if ((flag = CMP(l_1,l_2))) + if (piks && (flag = CMP(l_1,l_2))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a= end; b+= 3; @@ -903,7 +902,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, case HA_KEYTYPE_FLOAT: mi_float4get(f_1,a); mi_float4get(f_2,b); - if ((flag = CMP(f_1,f_2))) + if (piks && (flag = CMP(f_1,f_2))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a= end; b+= 4; /* sizeof(float); */ @@ -911,7 +910,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, case HA_KEYTYPE_DOUBLE: mi_float8get(d_1,a); mi_float8get(d_2,b); - if ((flag = CMP(d_1,d_2))) + if (piks && (flag = CMP(d_1,d_2))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a= end; b+= 8; /* sizeof(double); */ @@ -941,33 +940,40 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, for ( ; alength && *a == ' ' ; a++, alength--) ; for ( ; blength && *b == ' ' ; b++, blength--) ; } - - if (*a == '-') - { - if (*b != '-') - return -1; - a++; b++; - swap(uchar*,a,b); - swap(int,alength,blength); - swap_flag=1-swap_flag; - alength--; blength--; - end=a+alength; - } - else if (*b == '-') - return 1; - while (alength && (*a == '+' || *a == '0')) + if (piks) { - a++; alength--; + if (*a == '-') + { + if (*b != '-') + return -1; + a++; b++; + swap(uchar*,a,b); + swap(int,alength,blength); + swap_flag=1-swap_flag; + alength--; blength--; + end=a+alength; + } + else if (*b == '-') + return 1; + while (alength && (*a == '+' || *a == '0')) + { + a++; alength--; + } + while (blength && (*b == '+' || *b == '0')) + { + b++; blength--; + } + if (alength != blength) + return (alength < blength) ? -1 : 1; + while (a < end) + if (*a++ != *b++) + return ((int) a[-1] - (int) b[-1]); } - while (blength && (*b == '+' || *b == '0')) + else { - b++; blength--; + b+=(end-a); + a=end; } - if (alength != blength) - return (alength < blength) ? -1 : 1; - while (a < end) - if (*a++ != *b++) - return ((int) a[-1] - (int) b[-1]); if (swap_flag) /* Restore pointers */ swap(uchar*,a,b); @@ -979,7 +985,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, longlong ll_a,ll_b; ll_a= mi_sint8korr(a); ll_b= mi_sint8korr(b); - if ((flag = CMP(ll_a,ll_b))) + if (piks && (flag = CMP(ll_a,ll_b))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a= end; b+= 8; @@ -990,7 +996,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, ulonglong ll_a,ll_b; ll_a= mi_uint8korr(a); ll_b= mi_uint8korr(b); - if ((flag = CMP(ll_a,ll_b))) + if (piks && (flag = CMP(ll_a,ll_b))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a= end; b+= 8; diff --git a/myisam/mi_write.c b/myisam/mi_write.c index e3de13d2d9a..5c6db053829 100644 --- a/myisam/mi_write.c +++ b/myisam/mi_write.c @@ -778,7 +778,7 @@ static int keys_free(uchar *key, TREE_FREE mode, bulk_insert_param *param) keyinfo=param->info->s->keyinfo+param->keynr; keylen=_mi_keylength(keyinfo, key); memcpy(lastkey, key, keylen); - return _mi_ck_write_btree(param->info,param->keynr,lastkey, + return _mi_ck_write_btree(param->info,param->keynr,lastkey, keylen - param->info->s->rec_reflength); case free_end: if (param->info->s->concurrent_insert) @@ -798,7 +798,7 @@ int _mi_init_bulk_insert(MI_INFO *info) if (info->bulk_insert) return 0; - + for (i=num_keys=0 ; i < share->base.keys ; i++) { if (!(key[i].flag & HA_NOSAME) && share->base.auto_key != i+1 @@ -811,7 +811,7 @@ int _mi_init_bulk_insert(MI_INFO *info) if (!num_keys) return 0; - + info->bulk_insert=(TREE *) my_malloc((sizeof(TREE)*share->base.keys+ sizeof(bulk_insert_param)*num_keys),MYF(0)); @@ -826,13 +826,13 @@ int _mi_init_bulk_insert(MI_INFO *info) { params->info=info; params->keynr=i; - init_tree(& info->bulk_insert[i], 0, + init_tree(& info->bulk_insert[i], 0, myisam_bulk_insert_tree_size / num_keys, 0, (qsort_cmp2)keys_compare, 0, (tree_element_free) keys_free, (void *)params++); } else - info->bulk_insert[i].root=0; + info->bulk_insert[i].root=0; } return 0; |