summaryrefslogtreecommitdiff
path: root/myisam
diff options
context:
space:
mode:
authorSergey Vojtovich <svoj@mysql.com>2008-11-28 18:17:13 +0400
committerSergey Vojtovich <svoj@mysql.com>2008-11-28 18:17:13 +0400
commit589ac1cfdcb44818caff300777223eac723dd762 (patch)
tree7afb52b057d40a73647953f863304c5b5c8e4ff1 /myisam
parent78119b2d04457abe94f2a0a9f3d887c0eb9210a5 (diff)
downloadmariadb-git-589ac1cfdcb44818caff300777223eac723dd762.tar.gz
BUG#37245 - Full text search problem
Certain boolean mode queries with truncation operator did not return matching records and calculate relevancy incorrectly. myisam/ft_boolean_search.c: Sort ftb->list in ascending order. This helps to fix binary search in ft_boolean_find_relevance() without rewriting it much. Fixed binary search in ft_boolean_find_relevance(), so it finds right-most element in an array. Fixed that ft_boolean_find_relevance() didn't return match for words with truncation operator in case query has other non- matching words. mysql-test/r/fulltext.result: A test case for BUG#37245. mysql-test/t/fulltext.test: A test case for BUG#37245.
Diffstat (limited to 'myisam')
-rw-r--r--myisam/ft_boolean_search.c38
1 files changed, 32 insertions, 6 deletions
diff --git a/myisam/ft_boolean_search.c b/myisam/ft_boolean_search.c
index 57de75ee4be..255c51fd33a 100644
--- a/myisam/ft_boolean_search.c
+++ b/myisam/ft_boolean_search.c
@@ -122,11 +122,11 @@ static int FTB_WORD_cmp(my_off_t *v, FTB_WORD *a, FTB_WORD *b)
static int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b)
{
- /* ORDER BY word DESC, ndepth DESC */
- int i= mi_compare_text(cs, (uchar*) (*b)->word+1,(*b)->len-1,
- (uchar*) (*a)->word+1,(*a)->len-1,0,0);
+ /* ORDER BY word, ndepth */
+ int i= mi_compare_text(cs, (uchar*) (*a)->word + 1, (*a)->len - 1,
+ (uchar*) (*b)->word + 1, (*b)->len - 1, 0, 0);
if (!i)
- i=CMP_NUM((*b)->ndepth,(*a)->ndepth);
+ i= CMP_NUM((*a)->ndepth, (*b)->ndepth);
return i;
}
@@ -674,23 +674,49 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length)
(byte *) end, &word, TRUE))
{
int a, b, c;
+ /*
+ Find right-most element in the array of query words matching this
+ word from a document.
+ */
for (a=0, b=ftb->queue.elements, c=(a+b)/2; b-a>1; c=(a+b)/2)
{
ftbw=ftb->list[c];
if (mi_compare_text(ftb->charset, (uchar*) word.pos, word.len,
(uchar*) ftbw->word+1, ftbw->len-1,
- (my_bool) (ftbw->flags&FTB_FLAG_TRUNC),0) >0)
+ (my_bool) (ftbw->flags & FTB_FLAG_TRUNC), 0) < 0)
b=c;
else
a=c;
}
+ /*
+ If there were no words with truncation operator, we iterate to the
+ beginning of an array until array element is equal to the word from
+ a document. This is done mainly because the same word may be
+ mentioned twice (or more) in the query.
+
+ In case query has words with truncation operator we must iterate
+ to the beginning of the array. There may be non-matching query words
+ between matching word with truncation operator and the right-most
+ matching element. E.g., if we're looking for 'aaa15' in an array of
+ 'aaa1* aaa14 aaa15 aaa16'.
+
+ Worse of that there still may be match even if the binary search
+ above didn't find matching element. E.g., if we're looking for
+ 'aaa15' in an array of 'aaa1* aaa14 aaa16'. The binary search will
+ stop at 'aaa16'.
+ */
for (; c>=0; c--)
{
ftbw=ftb->list[c];
if (mi_compare_text(ftb->charset, (uchar*) word.pos, word.len,
(uchar*) ftbw->word+1,ftbw->len-1,
(my_bool) (ftbw->flags&FTB_FLAG_TRUNC),0))
- break;
+ {
+ if (ftb->with_scan & FTB_FLAG_TRUNC)
+ continue;
+ else
+ break;
+ }
if (ftbw->docid[1] == docid)
continue;
ftbw->docid[1]=docid;