BUG#37245 - Full text search problem

Certain boolean mode queries with truncation operator did not return matching records and calculate relevancy incorrectly.
author: Sergey Vojtovich <svoj@mysql.com> 2008-11-28 18:17:13 +0400
committer: Sergey Vojtovich <svoj@mysql.com> 2008-11-28 18:17:13 +0400
commit: 85d4cbae24d0a6d86911a1845d0c3fe24f48c36e (patch)
tree: 7afb52b057d40a73647953f863304c5b5c8e4ff1 /myisam
parent: 17cd69ccf40824c89d4f3cdb0190006d81bbbcb0 (diff)
download: mariadb-git-85d4cbae24d0a6d86911a1845d0c3fe24f48c36e.tar.gz
1 files changed, 32 insertions, 6 deletions
diff --git a/myisam/ft_boolean_search.c b/myisam/ft_boolean_search.c
index 57de75ee4be..255c51fd33a 100644
--- a/myisam/ft_boolean_search.c
+++ b/myisam/ft_boolean_search.c
@@ -122,11 +122,11 @@ static int FTB_WORD_cmp(my_off_t *v, FTB_WORD *a, FTB_WORD *b)
 
 static int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b)
 {
-  /* ORDER BY word DESC, ndepth DESC */
-  int i= mi_compare_text(cs, (uchar*) (*b)->word+1,(*b)->len-1,
-                             (uchar*) (*a)->word+1,(*a)->len-1,0,0);
+  /* ORDER BY word, ndepth */
+  int i= mi_compare_text(cs, (uchar*) (*a)->word + 1, (*a)->len - 1,
+                             (uchar*) (*b)->word + 1, (*b)->len - 1, 0, 0);
   if (!i)
-    i=CMP_NUM((*b)->ndepth,(*a)->ndepth);
+    i= CMP_NUM((*a)->ndepth, (*b)->ndepth);
   return i;
 }
 
@@ -674,23 +674,49 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length)
                               (byte *) end, &word, TRUE))
     {
       int a, b, c;
+      /*
+        Find right-most element in the array of query words matching this
+        word from a document.
+      */
       for (a=0, b=ftb->queue.elements, c=(a+b)/2; b-a>1; c=(a+b)/2)
       {
         ftbw=ftb->list[c];
         if (mi_compare_text(ftb->charset, (uchar*) word.pos, word.len,
                             (uchar*) ftbw->word+1, ftbw->len-1,
-                            (my_bool) (ftbw->flags&FTB_FLAG_TRUNC),0) >0)
+                            (my_bool) (ftbw->flags & FTB_FLAG_TRUNC), 0) < 0)
           b=c;
         else
           a=c;
       }
+      /*
+        If there were no words with truncation operator, we iterate to the
+        beginning of an array until array element is equal to the word from
+        a document. This is done mainly because the same word may be
+        mentioned twice (or more) in the query.
+
+        In case query has words with truncation operator we must iterate
+        to the beginning of the array. There may be non-matching query words
+        between matching word with truncation operator and the right-most
+        matching element. E.g., if we're looking for 'aaa15' in an array of
+        'aaa1* aaa14 aaa15 aaa16'.
+
+        Worse of that there still may be match even if the binary search
+        above didn't find matching element. E.g., if we're looking for
+        'aaa15' in an array of 'aaa1* aaa14 aaa16'. The binary search will
+        stop at 'aaa16'.
+      */
       for (; c>=0; c--)
       {
         ftbw=ftb->list[c];
         if (mi_compare_text(ftb->charset, (uchar*) word.pos, word.len,
                             (uchar*) ftbw->word+1,ftbw->len-1,
                             (my_bool) (ftbw->flags&FTB_FLAG_TRUNC),0))
-          break;
+        {
+          if (ftb->with_scan & FTB_FLAG_TRUNC)
+            continue;
+          else
+            break;
+        }
         if (ftbw->docid[1] == docid)
           continue;
         ftbw->docid[1]=docid;
author	Sergey Vojtovich <svoj@mysql.com>	2008-11-28 18:17:13 +0400
committer	Sergey Vojtovich <svoj@mysql.com>	2008-11-28 18:17:13 +0400
commit	85d4cbae24d0a6d86911a1845d0c3fe24f48c36e (patch)
tree	7afb52b057d40a73647953f863304c5b5c8e4ff1 /myisam
parent	17cd69ccf40824c89d4f3cdb0190006d81bbbcb0 (diff)
download	mariadb-git-85d4cbae24d0a6d86911a1845d0c3fe24f48c36e.tar.gz