summaryrefslogtreecommitdiff
path: root/myisam/ft_boolean_search.c
diff options
context:
space:
mode:
Diffstat (limited to 'myisam/ft_boolean_search.c')
-rw-r--r--myisam/ft_boolean_search.c257
1 files changed, 166 insertions, 91 deletions
diff --git a/myisam/ft_boolean_search.c b/myisam/ft_boolean_search.c
index 15a6348336d..0f7be592dd4 100644
--- a/myisam/ft_boolean_search.c
+++ b/myisam/ft_boolean_search.c
@@ -16,6 +16,8 @@
/* Written by Sergei A. Golubchik, who has a shared copyright to this code */
+/* TODO: add caching - pre-read several index entries at once */
+
#define FT_CORE
#include "ftdefs.h"
#include <queues.h>
@@ -78,7 +80,7 @@ typedef struct st_ft_info {
struct _ft_vft *please;
MI_INFO *info;
uint keynr;
- int ok;
+ enum { UNINITIALIZED, READY, INDEX_SEARCH, INDEX_DONE, SCAN } state;
FTB_EXPR *root;
QUEUE queue;
MEM_ROOT mem_root;
@@ -101,13 +103,9 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end,
FT_WORD w;
FTB_WORD *ftbw;
FTB_EXPR *ftbe;
- MI_INFO *info=ftb->info;
- int r;
- MI_KEYDEF *keyinfo=info->s->keyinfo+ftb->keynr;
- my_off_t keyroot=info->s->state.key_root[ftb->keynr];
- uint extra=HA_FT_WLEN+info->s->rec_reflength; /* just a shortcut */
+ uint extra=HA_FT_WLEN+ftb->info->s->rec_reflength; /* just a shortcut */
- if (! ftb->ok)
+ if (ftb->state != UNINITIALIZED)
return;
param.prev=' ';
@@ -132,7 +130,7 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end,
case 1:
ftbw=(FTB_WORD *)alloc_root(&ftb->mem_root,
sizeof(FTB_WORD) + (param.trunc ? MI_MAX_KEY_BUFF : w.len+extra));
- ftbw->len=w.len + !param.trunc;
+ ftbw->len=w.len+1;
ftbw->yesno=param.yesno;
ftbw->trunc=param.trunc; /* 0 or 1 */
ftbw->weight=weight;
@@ -142,37 +140,55 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end,
memcpy(ftbw->word+1, w.pos, w.len);
ftbw->word[0]=w.len;
if (ftbw->yesno > 0) up->ythresh++;
- /*****************************************/
- r=_mi_search(info, keyinfo, ftbw->word, ftbw->len,
- SEARCH_FIND | SEARCH_PREFIX, keyroot);
- if (!r)
- {
- r=_mi_compare_text(default_charset_info,
- info->lastkey+ftbw->trunc,ftbw->len,
- ftbw->word+ftbw->trunc,ftbw->len,0);
- }
- if (r) /* not found */
- {
- if (ftbw->yesno>0 && ftbw->up->up==0)
- { /* this word MUST BE present in every document returned,
- so we can abort the search right now */
- ftb->ok=0;
- return;
- }
- }
- else
- {
- memcpy(ftbw->word, info->lastkey, info->lastkey_length);
- ftbw->docid=info->lastpos;
- queue_insert(& ftb->queue, (byte *)ftbw);
- }
- /*****************************************/
+ queue_insert(& ftb->queue, (byte *)ftbw);
break;
}
}
return;
}
+void _ftb_init_index_search(FT_INFO *ftb)
+{
+ int i, r;
+ FTB_WORD *ftbw;
+ MI_INFO *info=ftb->info;
+ MI_KEYDEF *keyinfo=info->s->keyinfo+ftb->keynr;
+ my_off_t keyroot=info->s->state.key_root[ftb->keynr];
+
+ if (ftb->state != READY)
+ return;
+ ftb->state=INDEX_SEARCH;
+
+ for (i=ftb->queue.elements; i; i--)
+ {
+ ftbw=(FTB_WORD *)(ftb->queue.root[i]);
+
+ r=_mi_search(info, keyinfo, ftbw->word, ftbw->len,
+ SEARCH_FIND | SEARCH_PREFIX, keyroot);
+ if (!r)
+ {
+ r=_mi_compare_text(default_charset_info,
+ info->lastkey+ftbw->trunc,ftbw->len-ftbw->trunc,
+ ftbw->word+ftbw->trunc,ftbw->len-ftbw->trunc,0);
+ }
+ if (r) /* not found */
+ {
+ if (ftbw->yesno>0 && ftbw->up->up==0)
+ { /* this word MUST BE present in every document returned,
+ so we can abort the search right now */
+ ftb->state=INDEX_DONE;
+ return;
+ }
+ }
+ else
+ {
+ memcpy(ftbw->word, info->lastkey, info->lastkey_length);
+ ftbw->docid=info->lastpos;
+ }
+ }
+ queue_fix(& ftb->queue);
+}
+
FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query,
uint query_len, my_bool presort __attribute__((unused)))
{
@@ -183,7 +199,7 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query,
if (!(ftb=(FTB *)my_malloc(sizeof(FTB), MYF(MY_WME))))
return 0;
ftb->please=& _ft_vft_boolean;
- ftb->ok=1;
+ ftb->state=UNINITIALIZED;
ftb->info=info;
ftb->keynr=keynr;
@@ -202,9 +218,63 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query,
ftbe->docid=HA_POS_ERROR;
ftb->root=ftbe;
_ftb_parse_query(ftb, &query, query+query_len, ftbe, 0, 0);
+ ftb->state=READY;
return ftb;
}
+void _ftb_climb_the_tree(FTB_WORD *ftbw, my_off_t curdoc)
+{
+ FTB_EXPR *ftbe;
+ float weight=ftbw->weight;
+ int yn=ftbw->yesno;
+
+ for (ftbe=ftbw->up; ftbe; ftbe=ftbe->up)
+ {
+ if (ftbe->docid != curdoc)
+ {
+ ftbe->cur_weight=ftbe->yesses=ftbe->nos=0;
+ ftbe->docid=curdoc;
+ }
+ if (yn>0)
+ {
+ ftbe->cur_weight+=weight;
+ if (++ftbe->yesses >= ftbe->ythresh && !ftbe->nos)
+ {
+ yn=ftbe->yesno;
+ weight=ftbe->cur_weight*ftbe->weight;
+ }
+ else
+ break;
+ }
+ else
+ if (yn<0)
+ {
+ /* NOTE: special sort function of queue assures that all yn<0
+ * events for every particular subexpression will
+ * "auto-magically" happen BEFORE all yn>=0 events. So no
+ * already matched expression can become not-matched again.
+ */
+ ++ftbe->nos;
+ break;
+ }
+ else
+ /* if (yn==0) */
+ {
+ if (ftbe->yesses >= ftbe->ythresh && !ftbe->nos)
+ {
+ yn=ftbe->yesno;
+ ftbe->cur_weight=weight;
+ weight*=ftbe->weight;
+ }
+ else
+ {
+ ftbe->cur_weight+=weight;
+ break;
+ }
+ }
+ }
+}
+
int ft_boolean_read_next(FT_INFO *ftb, char *record)
{
FTB_EXPR *ftbe, *up;
@@ -215,6 +285,9 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record)
my_off_t curdoc;
int r;
+ if (ftb->state != INDEX_SEARCH && ftb->state != INDEX_DONE)
+ return -1;
+
/* black magic ON */
if ((int) _mi_check_index(info, ftb->keynr) < 0)
return my_errno;
@@ -225,66 +298,21 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record)
if (!ftb->queue.elements)
return my_errno=HA_ERR_END_OF_FILE;
- while(ftb->ok &&
+ while(ftb->state == INDEX_SEARCH &&
(curdoc=((FTB_WORD *)queue_top(& ftb->queue))->docid) != HA_POS_ERROR)
{
while (curdoc==(ftbw=(FTB_WORD *)queue_top(& ftb->queue))->docid)
{
- float weight=ftbw->weight;
- int yn=ftbw->yesno;
- for (ftbe=ftbw->up; ftbe; ftbe=ftbe->up)
- {
- if (ftbe->docid != curdoc)
- {
- ftbe->cur_weight=ftbe->yesses=ftbe->nos=0;
- ftbe->docid=curdoc;
- }
- if (yn>0)
- {
- ftbe->cur_weight+=weight;
- if (++ftbe->yesses >= ftbe->ythresh && !ftbe->nos)
- {
- yn=ftbe->yesno;
- weight=ftbe->cur_weight*ftbe->weight;
- }
- else
- break;
- }
- else
- if (yn<0)
- {
- /* NOTE: special sort function of queue assures that all yn<0
- * events for every particular subexpression will
- * "auto-magically" happen BEFORE all yn>=0 events. So no
- * already matched expression can become not-matched again.
- */
- ++ftbe->nos;
- break;
- }
- else
- /* if (yn==0) */
- {
- if (ftbe->yesses >= ftbe->ythresh && !ftbe->nos)
- {
- yn=ftbe->yesno;
- ftbe->cur_weight=weight;
- weight*=ftbe->weight;
- }
- else
- {
- ftbe->cur_weight+=weight;
- break;
- }
- }
- }
+ _ftb_climb_the_tree(ftbw, curdoc);
+
/* update queue */
- r=_mi_search(info, keyinfo, ftbw->word, USE_WHOLE_KEY, /*ftbw->len,*/
+ r=_mi_search(info, keyinfo, ftbw->word, USE_WHOLE_KEY,
SEARCH_BIGGER , keyroot);
if (!r)
{
r=_mi_compare_text(default_charset_info,
- info->lastkey+ftbw->trunc,ftbw->len,
- ftbw->word+ftbw->trunc,ftbw->len,0);
+ info->lastkey+ftbw->trunc,ftbw->len-ftbw->trunc,
+ ftbw->word+ftbw->trunc,ftbw->len-ftbw->trunc,0);
}
if (r) /* not found */
{
@@ -292,7 +320,7 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record)
if (ftbw->yesno>0 && ftbw->up->up==0)
{ /* this word MUST BE present in every document returned,
so we can stop the search right now */
- ftb->ok=0;
+ ftb->state=INDEX_DONE;
}
}
else
@@ -304,7 +332,8 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record)
}
ftbe=ftb->root;
- if (ftbe->cur_weight>0 && ftbe->yesses>=ftbe->ythresh && !ftbe->nos)
+ if (ftbe->docid==curdoc && ftbe->cur_weight>0 &&
+ ftbe->yesses>=ftbe->ythresh && !ftbe->nos)
{
/* curdoc matched ! */
info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); /* why is this ? */
@@ -321,10 +350,56 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record)
return my_errno=HA_ERR_END_OF_FILE;
}
-float ft_boolean_find_relevance(FT_INFO *ftb,
- my_off_t docid __attribute__((unused)), byte *record)
+float ft_boolean_find_relevance(FT_INFO *ftb, my_off_t docid, byte *record)
{
- return -1.0; /* to be done via str scan */
+ TREE ptree;
+ FT_WORD word;
+ FTB_WORD *ftbw;
+ FTB_EXPR *ftbe;
+ uint i;
+
+ if (ftb->state == READY)
+ {
+ queue_fix(& ftb->queue);
+ ftb->state=SCAN;
+ }
+ else if (ftb->state != SCAN)
+ return -1.0;
+
+ bzero(&ptree, sizeof(ptree));
+ if (_mi_ft_parse(& ptree, ftb->info, ftb->keynr, record))
+ return -1.0;
+
+ for (i=1; i<=ftb->queue.elements; i++)
+ {
+ ftbw=(FTB_WORD *)(ftb->queue.root[i]);
+ ptree.custom_arg=(void *)(ftbw->trunc);
+ word.pos=ftbw->word+1;
+ word.len=ftbw->len-1;
+ if (tree_search(& ptree, & word))
+ { /* found! */
+ _ftb_climb_the_tree(ftbw, docid);
+ }
+ else
+ { /* not found! */
+ if (ftbw->yesno>0 && ftbw->up->up==0)
+ { /* but this word MUST BE present in every document matched,
+ so we can stop the search right now */
+ break;
+ }
+ }
+ }
+ delete_tree(& ptree);
+ ftbe=ftb->root;
+ if (ftbe->docid==docid && ftbe->cur_weight>0 &&
+ ftbe->yesses>=ftbe->ythresh && !ftbe->nos)
+ { /* row matched ! */
+ return ftbe->cur_weight;
+ }
+ else
+ { /* match failed ! */
+ return 0.0;
+ }
}
void ft_boolean_close_search(FT_INFO *ftb)
@@ -345,6 +420,6 @@ my_off_t ft_boolean_get_docid(FT_INFO *ftb)
void ft_boolean_reinit_search(FT_INFO *ftb)
{
- fprintf(stderr, "ft_boolean_reinit_search called!\n");
+ _ftb_init_index_search(ftb);
}