diff options
Diffstat (limited to 'myisam/ft_boolean_search.c')
-rw-r--r-- | myisam/ft_boolean_search.c | 121 |
1 files changed, 80 insertions, 41 deletions
diff --git a/myisam/ft_boolean_search.c b/myisam/ft_boolean_search.c index 6829ac95f1e..1a70113f0ad 100644 --- a/myisam/ft_boolean_search.c +++ b/myisam/ft_boolean_search.c @@ -24,7 +24,8 @@ /* search with boolean queries */ -static double _wghts[11]={ +static double _wghts[11]= +{ 0.131687242798354, 0.197530864197531, 0.296296296296296, @@ -38,7 +39,8 @@ static double _wghts[11]={ 7.593750000000000}; static double *wghts=_wghts+5; /* wghts[i] = 1.5**i */ -static double _nwghts[11]={ +static double _nwghts[11]= +{ -0.065843621399177, -0.098765432098766, -0.148148148148148, @@ -57,7 +59,8 @@ static double *nwghts=_nwghts+5; /* nwghts[i] = -0.5*1.5**i */ #define FTB_FLAG_NO 4 /* should NEVER be set both */ typedef struct st_ftb_expr FTB_EXPR; -struct st_ftb_expr { +struct st_ftb_expr +{ FTB_EXPR *up; byte *quot, *qend; float weight; @@ -70,7 +73,8 @@ struct st_ftb_expr { int yweaks; /* number of "yes" words for scan only */ }; -typedef struct st_ftb_word { +typedef struct st_ftb_word +{ FTB_EXPR *up; float weight; uint flags; @@ -81,7 +85,8 @@ typedef struct st_ftb_word { byte word[1]; } FTB_WORD; -typedef struct st_ft_info { +typedef struct st_ft_info +{ struct _ft_vft *please; MI_INFO *info; uint keynr; @@ -95,16 +100,22 @@ typedef struct st_ft_info { MEM_ROOT mem_root; } FTB; -int FTB_WORD_cmp(void *v __attribute__((unused)), FTB_WORD *a, FTB_WORD *b) +static int FTB_WORD_cmp(my_off_t *v, FTB_WORD *a, FTB_WORD *b) { + int i; + + /* if a==curdoc, take it as a < b */ + if (v && a->docid[0] == *v) + return -1; + /* ORDER BY docid, ndepth DESC */ - int i=CMP_NUM(a->docid[0], b->docid[0]); + i=CMP_NUM(a->docid[0], b->docid[0]); if (!i) i=CMP_NUM(b->ndepth,a->ndepth); return i; } -int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b) +static int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b) { /* ORDER BY word DESC, ndepth DESC */ int i= mi_compare_text(cs, (*b)->word+1,(*b)->len-1, @@ -114,7 +125,7 @@ int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b) return i; } -void _ftb_parse_query(FTB *ftb, byte **start, byte *end, +static void _ftb_parse_query(FTB *ftb, byte **start, byte *end, FTB_EXPR *up, uint depth) { byte res; @@ -176,12 +187,13 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end, return; } -static int _ftb_no_dupes_cmp(void* not_used, const void *a,const void *b) +static int _ftb_no_dupes_cmp(void* not_used __attribute__((unused)), + const void *a,const void *b) { return CMP_NUM((*((my_off_t*)a)), (*((my_off_t*)b))); } -void _ftb_init_index_search(FT_INFO *ftb) +static void _ftb_init_index_search(FT_INFO *ftb) { int i, r; FTB_WORD *ftbw; @@ -200,21 +212,23 @@ void _ftb_init_index_search(FT_INFO *ftb) { ftbw=(FTB_WORD *)(ftb->queue.root[i]); - if (ftbw->flags&FTB_FLAG_TRUNC) - /* special treatment for truncation operator :(( - 1. +trunc* and there're other (not +trunc*) words + if (ftbw->flags & FTB_FLAG_TRUNC) + { + /* + special treatment for truncation operator :(( + 1. +trunc* and there're other (not +trunc*) words | no need to search in the index, it can never ADD new rows | to the result, and to remove half-matched rows we do scan anyway - 2. -trunc* + 2. -trunc* | same as 1. - 3. trunc* + 3. trunc* | We have to index-search for this prefix. | It may cause duplicates, as in the index (sorted by <word,docid>) | <aaaa,row1> | <aabb,row2> | <aacc,row1> | Searching for "aa*" will find row1 twice... - */ + */ if ( test(ftbw->flags&FTB_FLAG_NO) || /* 2 */ (test(ftbw->flags&FTB_FLAG_YES) && /* 1 */ ftbw->up->ythresh - ftbw->up->yweaks >1)) /* 1 */ @@ -231,7 +245,7 @@ void _ftb_init_index_search(FT_INFO *ftb) _ftb_no_dupes_cmp,0,0,0); } } - + } r=_mi_search(info, keyinfo, (uchar*) ftbw->word, ftbw->len, SEARCH_FIND | SEARCH_BIGGER, keyroot); if (!r) @@ -246,8 +260,11 @@ void _ftb_init_index_search(FT_INFO *ftb) if (r) /* not found */ { if (ftbw->flags&FTB_FLAG_YES && ftbw->up->up==0) - { /* this word MUST BE present in every document returned, - so we can abort the search right now */ + { + /* + This word MUST BE present in every document returned, + so we can abort the search right now + */ ftb->state=INDEX_DONE; return; } @@ -261,8 +278,10 @@ void _ftb_init_index_search(FT_INFO *ftb) queue_fix(& ftb->queue); } + FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query, - uint query_len, my_bool presort __attribute__((unused))) + uint query_len, + my_bool presort __attribute__((unused))) { FTB *ftb; FTB_EXPR *ftbe; @@ -282,13 +301,14 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query, init_alloc_root(&ftb->mem_root, 1024, 1024); - /* hack: instead of init_queue, we'll use reinit queue to be able - * to alloc queue with alloc_root() - */ + /* + Hack: instead of init_queue, we'll use reinit queue to be able + to alloc queue with alloc_root() + */ res=ftb->queue.max_elements=1+query_len/(ft_min_word_len+1); ftb->queue.root=(byte **)alloc_root(&ftb->mem_root, (res+1)*sizeof(void*)); reinit_queue(& ftb->queue, res, 0, 0, - (int (*)(void*,byte*,byte*))FTB_WORD_cmp, ftb); + (int (*)(void*,byte*,byte*))FTB_WORD_cmp, 0); ftbe=(FTB_EXPR *)alloc_root(&ftb->mem_root, sizeof(FTB_EXPR)); ftbe->weight=1; ftbe->flags=FTB_FLAG_YES; @@ -309,8 +329,9 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query, return ftb; } + /* returns 1 if str0 contain str1 */ -int _ftb_strstr(const byte *s0, const byte *e0, +static int _ftb_strstr(const byte *s0, const byte *e0, const byte *s1, const byte *e1, CHARSET_INFO *cs) { @@ -333,7 +354,8 @@ int _ftb_strstr(const byte *s0, const byte *e0, return 0; } -void _ftb_climb_the_tree(FTB *ftb, FTB_WORD *ftbw, FT_SEG_ITERATOR *ftsi_orig) + +static void _ftb_climb_the_tree(FTB *ftb, FTB_WORD *ftbw, FT_SEG_ITERATOR *ftsi_orig) { FT_SEG_ITERATOR ftsi; FTB_EXPR *ftbe; @@ -381,28 +403,31 @@ void _ftb_climb_the_tree(FTB *ftb, FTB_WORD *ftbw, FT_SEG_ITERATOR *ftsi_orig) else if (yn & FTB_FLAG_NO) { - /* NOTE: special sort function of queue assures that all - * (yn & FTB_FLAG_NO) != 0 - * events for every particular subexpression will - * "auto-magically" happen BEFORE all the - * (yn & FTB_FLAG_YES) != 0 events. So no - * already matched expression can become not-matched again. + /* + NOTE: special sort function of queue assures that all + (yn & FTB_FLAG_NO) != 0 + events for every particular subexpression will + "auto-magically" happen BEFORE all the + (yn & FTB_FLAG_YES) != 0 events. So no + already matched expression can become not-matched again. */ ++ftbe->nos; break; } else { - if (ftbe->ythresh) weight/=3; + if (ftbe->ythresh) + weight/=3; ftbe->cur_weight += weight; if (ftbe->yesses < ythresh) break; yn= (ftbe->yesses++ == ythresh) ? ftbe->flags : 0 ; - weight*=ftbe->weight; + weight*= ftbe->weight; } } } + int ft_boolean_read_next(FT_INFO *ftb, char *record) { FTB_EXPR *ftbe; @@ -426,8 +451,11 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record) if (!ftb->queue.elements) return my_errno=HA_ERR_END_OF_FILE; + /* Attention!!! Address of a local variable is used here! See err: label */ + ftb->queue.first_cmp_arg=(void *)&curdoc; + while (ftb->state == INDEX_SEARCH && - (curdoc=((FTB_WORD *)queue_top(& ftb->queue))->docid[0]) != + (curdoc=((FTB_WORD *)queue_top(& ftb->queue))->docid[0]) != HA_POS_ERROR) { while (curdoc == (ftbw=(FTB_WORD *)queue_top(& ftb->queue))->docid[0]) @@ -450,8 +478,11 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record) { ftbw->docid[0]=HA_POS_ERROR; if (ftbw->flags&FTB_FLAG_YES && ftbw->up->up==0) - { /* this word MUST BE present in every document returned, - so we can stop the search right now */ + { + /* + This word MUST BE present in every document returned, + so we can stop the search right now + */ ftb->state=INDEX_DONE; } } @@ -483,15 +514,20 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record) info->update|= HA_STATE_AKTIV; /* Record is read */ if (ftb->with_scan && ft_boolean_find_relevance(ftb,record,0)==0) continue; /* no match */ - return 0; + my_errno=0; + goto err; } - return my_errno; + goto err; } } ftb->state=INDEX_DONE; - return my_errno=HA_ERR_END_OF_FILE; + my_errno=HA_ERR_END_OF_FILE; +err: + ftb->queue.first_cmp_arg=(void *)0; + return my_errno; } + float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) { FT_WORD word; @@ -558,6 +594,7 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) } } + void ft_boolean_close_search(FT_INFO *ftb) { if (is_tree_inited(& ftb->no_dupes)) @@ -568,11 +605,13 @@ void ft_boolean_close_search(FT_INFO *ftb) my_free((gptr)ftb,MYF(0)); } + float ft_boolean_get_relevance(FT_INFO *ftb) { return ftb->root->cur_weight; } + void ft_boolean_reinit_search(FT_INFO *ftb) { _ftb_init_index_search(ftb); |