diff options
Diffstat (limited to 'myisam/ft_boolean_search.c')
-rw-r--r-- | myisam/ft_boolean_search.c | 55 |
1 files changed, 43 insertions, 12 deletions
diff --git a/myisam/ft_boolean_search.c b/myisam/ft_boolean_search.c index c3883ed0961..a026a8613dc 100644 --- a/myisam/ft_boolean_search.c +++ b/myisam/ft_boolean_search.c @@ -77,7 +77,7 @@ typedef struct st_ftb_word { my_off_t docid[2]; /* for index search and for scan */ uint ndepth; int len; - /* ... there can be docid cache added here. SerG */ + /* ... docid cache can be added here. SerG */ byte word[1]; } FTB_WORD; @@ -90,6 +90,7 @@ typedef struct st_ft_info { uint with_scan; FTB_EXPR *root; QUEUE queue; + TREE no_dupes; FTB_WORD **list; MEM_ROOT mem_root; } FTB; @@ -174,7 +175,12 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end, return; } -void _ftb_init_index_search(FT_INFO *ftb) +static int _ftb_no_dupes_cmp(void* not_used, const void *a,const void *b) +{ + return CMP_NUM((*((my_off_t*)a)), (*((my_off_t*)b))); +} + +void _ftb_init_index_search(FT_INFO *ftb) { int i, r; FTB_WORD *ftbw; @@ -193,16 +199,31 @@ void _ftb_init_index_search(FT_INFO *ftb) { ftbw=(FTB_WORD *)(ftb->queue.root[i]); - if (ftbw->flags&FTB_FLAG_TRUNC && - (ftbw->up->ythresh > test(ftbw->flags&FTB_FLAG_YES))) - { - /* no need to search for this prefix in the index - - * it cannot ADD new matches, and to REMOVE half-matched - * rows we do scan anyway */ - ftbw->docid[0]=HA_POS_ERROR; - ftbw->up->yweaks++; - continue; - } + if (ftbw->flags&FTB_FLAG_TRUNC) /* special treatment :(( */ + if (ftbw->up->ythresh > test(ftbw->flags&FTB_FLAG_YES)) + { + /* no need to search for this prefix in the index - + * it cannot ADD new matches, and to REMOVE half-matched + * rows we do scan anyway */ + ftbw->docid[0]=HA_POS_ERROR; + ftbw->up->yweaks++; + continue; + } + else + { + /* We have to index-search for this prefix. + * It may cause duplicates, as in the index (sorted by <word,docid>) + * <aaaa,row1> + * <aabb,row2> + * <aacc,row1> + * Searching for "aa*" will find row1 twice... + */ + if (!is_tree_inited(& ftb->no_dupes)) + { + init_tree(& ftb->no_dupes,0,0,sizeof(my_off_t), + _ftb_no_dupes_cmp,0,0,0); + } + } r=_mi_search(info, keyinfo, (uchar*) ftbw->word, ftbw->len, SEARCH_FIND | SEARCH_BIGGER, keyroot); @@ -250,6 +271,7 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query, default_charset_info : info->s->keyinfo[keynr].seg->charset); ftb->with_scan=0; + bzero(& ftb->no_dupes, sizeof(TREE)); init_alloc_root(&ftb->mem_root, 1024, 1024); @@ -438,6 +460,11 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record) ftbe->yesses>=(ftbe->ythresh-ftbe->yweaks) && !ftbe->nos) { /* curdoc matched ! */ + if (is_tree_inited(& ftb->no_dupes) && + tree_insert(& ftb->no_dupes, &curdoc, 0)->count >1) + /* but it managed to get past this line once */ + continue; + info->lastpos=curdoc; info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); /* why is this ? */ @@ -523,6 +550,10 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) void ft_boolean_close_search(FT_INFO *ftb) { + if (is_tree_inited(& ftb->no_dupes)) + { + delete_tree(& ftb->no_dupes); + } free_root(& ftb->mem_root, MYF(0)); my_free((gptr)ftb,MYF(0)); } |