diff options
author | Sergey Vojtovich <svoj@mysql.com> | 2008-11-28 18:17:13 +0400 |
---|---|---|
committer | Sergey Vojtovich <svoj@mysql.com> | 2008-11-28 18:17:13 +0400 |
commit | 85d4cbae24d0a6d86911a1845d0c3fe24f48c36e (patch) | |
tree | 7afb52b057d40a73647953f863304c5b5c8e4ff1 /myisam | |
parent | 17cd69ccf40824c89d4f3cdb0190006d81bbbcb0 (diff) | |
download | mariadb-git-85d4cbae24d0a6d86911a1845d0c3fe24f48c36e.tar.gz |
BUG#37245 - Full text search problem
Certain boolean mode queries with truncation operator did
not return matching records and calculate relevancy
incorrectly.
Diffstat (limited to 'myisam')
-rw-r--r-- | myisam/ft_boolean_search.c | 38 |
1 files changed, 32 insertions, 6 deletions
diff --git a/myisam/ft_boolean_search.c b/myisam/ft_boolean_search.c index 57de75ee4be..255c51fd33a 100644 --- a/myisam/ft_boolean_search.c +++ b/myisam/ft_boolean_search.c @@ -122,11 +122,11 @@ static int FTB_WORD_cmp(my_off_t *v, FTB_WORD *a, FTB_WORD *b) static int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b) { - /* ORDER BY word DESC, ndepth DESC */ - int i= mi_compare_text(cs, (uchar*) (*b)->word+1,(*b)->len-1, - (uchar*) (*a)->word+1,(*a)->len-1,0,0); + /* ORDER BY word, ndepth */ + int i= mi_compare_text(cs, (uchar*) (*a)->word + 1, (*a)->len - 1, + (uchar*) (*b)->word + 1, (*b)->len - 1, 0, 0); if (!i) - i=CMP_NUM((*b)->ndepth,(*a)->ndepth); + i= CMP_NUM((*a)->ndepth, (*b)->ndepth); return i; } @@ -674,23 +674,49 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) (byte *) end, &word, TRUE)) { int a, b, c; + /* + Find right-most element in the array of query words matching this + word from a document. + */ for (a=0, b=ftb->queue.elements, c=(a+b)/2; b-a>1; c=(a+b)/2) { ftbw=ftb->list[c]; if (mi_compare_text(ftb->charset, (uchar*) word.pos, word.len, (uchar*) ftbw->word+1, ftbw->len-1, - (my_bool) (ftbw->flags&FTB_FLAG_TRUNC),0) >0) + (my_bool) (ftbw->flags & FTB_FLAG_TRUNC), 0) < 0) b=c; else a=c; } + /* + If there were no words with truncation operator, we iterate to the + beginning of an array until array element is equal to the word from + a document. This is done mainly because the same word may be + mentioned twice (or more) in the query. + + In case query has words with truncation operator we must iterate + to the beginning of the array. There may be non-matching query words + between matching word with truncation operator and the right-most + matching element. E.g., if we're looking for 'aaa15' in an array of + 'aaa1* aaa14 aaa15 aaa16'. + + Worse of that there still may be match even if the binary search + above didn't find matching element. E.g., if we're looking for + 'aaa15' in an array of 'aaa1* aaa14 aaa16'. The binary search will + stop at 'aaa16'. + */ for (; c>=0; c--) { ftbw=ftb->list[c]; if (mi_compare_text(ftb->charset, (uchar*) word.pos, word.len, (uchar*) ftbw->word+1,ftbw->len-1, (my_bool) (ftbw->flags&FTB_FLAG_TRUNC),0)) - break; + { + if (ftb->with_scan & FTB_FLAG_TRUNC) + continue; + else + break; + } if (ftbw->docid[1] == docid) continue; ftbw->docid[1]=docid; |