summaryrefslogtreecommitdiff
path: root/storage
diff options
context:
space:
mode:
authorunknown <svoj@poseidon.ndb.mysql.com>2005-11-06 16:16:18 +0100
committerunknown <svoj@poseidon.ndb.mysql.com>2005-11-06 16:16:18 +0100
commitdf33aacd87ff08c27fd371a0bb348fe3986e6f95 (patch)
tree0c9f1805d1951780ff34fb797c8fec0193020aab /storage
parent2bddb0766473270f43e70bed16f3223255fee4a9 (diff)
parent3e5508909f7b7bebabfca5711e5778927279be19 (diff)
downloadmariadb-git-df33aacd87ff08c27fd371a0bb348fe3986e6f95.tar.gz
Merge tulin@bk-internal.mysql.com:/home/bk/mysql-5.1-new
into poseidon.ndb.mysql.com:/home/tomas/mysql-5.1-new
Diffstat (limited to 'storage')
-rw-r--r--storage/myisam/ft_boolean_search.c438
-rw-r--r--storage/myisam/ft_nlq_search.c2
-rw-r--r--storage/myisam/ft_parser.c114
-rw-r--r--storage/myisam/ft_static.c11
-rw-r--r--storage/myisam/ft_update.c4
-rw-r--r--storage/myisam/ftdefs.h15
-rw-r--r--storage/myisam/mi_open.c1
7 files changed, 408 insertions, 177 deletions
diff --git a/storage/myisam/ft_boolean_search.c b/storage/myisam/ft_boolean_search.c
index 563abf9f0cf..3a149b68e93 100644
--- a/storage/myisam/ft_boolean_search.c
+++ b/storage/myisam/ft_boolean_search.c
@@ -91,6 +91,7 @@ struct st_ftb_expr
float weight;
float cur_weight;
LIST *phrase; /* phrase words */
+ LIST *document; /* for phrase search */
uint yesses; /* number of "yes" words matched */
uint nos; /* number of "no" words matched */
uint ythresh; /* number of "yes" words in expr */
@@ -154,85 +155,160 @@ static int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b)
return i;
}
-static void _ftb_parse_query(FTB *ftb, byte **start, byte *end,
- FTB_EXPR *up, uint depth, byte *up_quot)
+
+typedef struct st_my_ftb_param
{
- byte res;
- FTB_PARAM param;
- FT_WORD w;
- FTB_WORD *ftbw;
- FTB_EXPR *ftbe;
- FTB_EXPR *tmp_expr;
- FT_WORD *phrase_word;
- LIST *phrase_list;
- uint extra=HA_FT_WLEN+ftb->info->s->rec_reflength; /* just a shortcut */
+ FTB *ftb;
+ FTB_EXPR *ftbe;
+ byte *up_quot;
+ uint depth;
+} MY_FTB_PARAM;
+
+
+static int ftb_query_add_word(void *param, byte *word, uint word_len,
+ MYSQL_FTPARSER_BOOLEAN_INFO *info)
+{
+ MY_FTB_PARAM *ftb_param= (MY_FTB_PARAM *)param;
+ FTB_WORD *ftbw;
+ FTB_EXPR *ftbe, *tmp_expr;
+ FT_WORD *phrase_word;
+ LIST *tmp_element;
+ int r= info->weight_adjust;
+ float weight= (float)
+ (info->wasign ? nwghts : wghts)[(r>5)?5:((r<-5)?-5:r)];
+
+ switch (info->type) {
+ case FT_TOKEN_WORD:
+ ftbw= (FTB_WORD *)alloc_root(&ftb_param->ftb->mem_root,
+ sizeof(FTB_WORD) +
+ (info->trunc ? MI_MAX_KEY_BUFF :
+ word_len * ftb_param->ftb->charset->mbmaxlen +
+ HA_FT_WLEN +
+ ftb_param->ftb->info->s->rec_reflength));
+ ftbw->len= word_len + 1;
+ ftbw->flags= 0;
+ ftbw->off= 0;
+ if (info->yesno > 0) ftbw->flags|= FTB_FLAG_YES;
+ if (info->yesno < 0) ftbw->flags|= FTB_FLAG_NO;
+ if (info->trunc) ftbw->flags|= FTB_FLAG_TRUNC;
+ ftbw->weight= weight;
+ ftbw->up= ftb_param->ftbe;
+ ftbw->docid[0]= ftbw->docid[1]= HA_OFFSET_ERROR;
+ ftbw->ndepth= (info->yesno < 0) + ftb_param->depth;
+ ftbw->key_root= HA_OFFSET_ERROR;
+ memcpy(ftbw->word + 1, word, word_len);
+ ftbw->word[0]= word_len;
+ if (info->yesno > 0) ftbw->up->ythresh++;
+ queue_insert(&ftb_param->ftb->queue, (byte *)ftbw);
+ ftb_param->ftb->with_scan|= (info->trunc & FTB_FLAG_TRUNC);
+ for (tmp_expr= ftb_param->ftbe; tmp_expr->up; tmp_expr= tmp_expr->up)
+ if (! (tmp_expr->flags & FTB_FLAG_YES))
+ break;
+ ftbw->max_docid= &tmp_expr->max_docid;
+ /* fall through */
+ case FT_TOKEN_STOPWORD:
+ if (! ftb_param->up_quot) break;
+ phrase_word= (FT_WORD *)alloc_root(&ftb_param->ftb->mem_root, sizeof(FT_WORD));
+ tmp_element= (LIST *)alloc_root(&ftb_param->ftb->mem_root, sizeof(LIST));
+ phrase_word->pos= word;
+ phrase_word->len= word_len;
+ tmp_element->data= (void *)phrase_word;
+ ftb_param->ftbe->phrase= list_add(ftb_param->ftbe->phrase, tmp_element);
+ /* Allocate document list at this point.
+ It allows to avoid huge amount of allocs/frees for each row.*/
+ tmp_element= (LIST *)alloc_root(&ftb_param->ftb->mem_root, sizeof(LIST));
+ tmp_element->data= alloc_root(&ftb_param->ftb->mem_root, sizeof(FT_WORD));
+ ftb_param->ftbe->document=
+ list_add(ftb_param->ftbe->document, tmp_element);
+ break;
+ case FT_TOKEN_LEFT_PAREN:
+ ftbe=(FTB_EXPR *)alloc_root(&ftb_param->ftb->mem_root, sizeof(FTB_EXPR));
+ ftbe->flags= 0;
+ if (info->yesno > 0) ftbe->flags|= FTB_FLAG_YES;
+ if (info->yesno < 0) ftbe->flags|= FTB_FLAG_NO;
+ ftbe->weight= weight;
+ ftbe->up= ftb_param->ftbe;
+ ftbe->max_docid= ftbe->ythresh= ftbe->yweaks= 0;
+ ftbe->docid[0]= ftbe->docid[1]= HA_OFFSET_ERROR;
+ ftbe->phrase= NULL;
+ ftbe->document= 0;
+ if (info->quot) ftb_param->ftb->with_scan|= 2;
+ if (info->yesno > 0) ftbe->up->ythresh++;
+ ftb_param->ftbe= ftbe;
+ ftb_param->depth++;
+ ftb_param->up_quot= info->quot;
+ break;
+ case FT_TOKEN_RIGHT_PAREN:
+ if (ftb_param->ftbe->document)
+ {
+ /* Circuit document list */
+ for (tmp_element= ftb_param->ftbe->document;
+ tmp_element->next; tmp_element= tmp_element->next) /* no-op */;
+ tmp_element->next= ftb_param->ftbe->document;
+ ftb_param->ftbe->document->prev= tmp_element;
+ }
+ info->quot= 0;
+ if (ftb_param->ftbe->up)
+ {
+ DBUG_ASSERT(ftb_param->depth);
+ ftb_param->ftbe= ftb_param->ftbe->up;
+ ftb_param->depth--;
+ ftb_param->up_quot= 0;
+ }
+ break;
+ case FT_TOKEN_EOF:
+ default:
+ break;
+ }
+ return(0);
+}
+
+
+static int ftb_parse_query_internal(void *param, byte *query, uint len)
+{
+ MY_FTB_PARAM *ftb_param= (MY_FTB_PARAM *)param;
+ MYSQL_FTPARSER_BOOLEAN_INFO info;
+ CHARSET_INFO *cs= ftb_param->ftb->charset;
+ byte **start= &query;
+ byte *end= query + len;
+ FT_WORD w;
+
+ info.prev= ' ';
+ info.quot= 0;
+ while (ft_get_word(cs, start, end, &w, &info))
+ ftb_query_add_word(param, w.pos, w.len, &info);
+ return(0);
+}
+
+
+static void _ftb_parse_query(FTB *ftb, byte *query, uint len,
+ struct st_mysql_ftparser *parser)
+{
+ MYSQL_FTPARSER_PARAM param;
+ MY_FTB_PARAM ftb_param;
+ DBUG_ENTER("_ftb_parse_query");
+ DBUG_ASSERT(parser);
if (ftb->state != UNINITIALIZED)
return;
- param.prev=' ';
- param.quot= up_quot;
- while ((res=ft_get_word(ftb->charset,start,end,&w,&param)))
- {
- int r=param.plusminus;
- float weight= (float) (param.pmsign ? nwghts : wghts)[(r>5)?5:((r<-5)?-5:r)];
- switch (res) {
- case 1: /* word found */
- ftbw=(FTB_WORD *)alloc_root(&ftb->mem_root,
- sizeof(FTB_WORD) +
- (param.trunc ? MI_MAX_KEY_BUFF :
- w.len*ftb->charset->mbmaxlen+extra));
- ftbw->len=w.len+1;
- ftbw->flags=0;
- ftbw->off=0;
- if (param.yesno>0) ftbw->flags|=FTB_FLAG_YES;
- if (param.yesno<0) ftbw->flags|=FTB_FLAG_NO;
- if (param.trunc) ftbw->flags|=FTB_FLAG_TRUNC;
- ftbw->weight=weight;
- ftbw->up=up;
- ftbw->docid[0]=ftbw->docid[1]=HA_OFFSET_ERROR;
- ftbw->ndepth= (param.yesno<0) + depth;
- ftbw->key_root=HA_OFFSET_ERROR;
- memcpy(ftbw->word+1, w.pos, w.len);
- ftbw->word[0]=w.len;
- if (param.yesno > 0) up->ythresh++;
- queue_insert(& ftb->queue, (byte *)ftbw);
- ftb->with_scan|=(param.trunc & FTB_FLAG_TRUNC);
- for (tmp_expr= up; tmp_expr->up; tmp_expr= tmp_expr->up)
- if (! (tmp_expr->flags & FTB_FLAG_YES))
- break;
- ftbw->max_docid= &tmp_expr->max_docid;
- case 4: /* not indexed word (stopword or too short/long) */
- if (! up_quot) break;
- phrase_word= (FT_WORD *)alloc_root(&ftb->mem_root, sizeof(FT_WORD));
- phrase_list= (LIST *)alloc_root(&ftb->mem_root, sizeof(LIST));
- phrase_word->pos= w.pos;
- phrase_word->len= w.len;
- phrase_list->data= (void *)phrase_word;
- up->phrase= list_add(up->phrase, phrase_list);
- break;
- case 2: /* left bracket */
- ftbe=(FTB_EXPR *)alloc_root(&ftb->mem_root, sizeof(FTB_EXPR));
- ftbe->flags=0;
- if (param.yesno>0) ftbe->flags|=FTB_FLAG_YES;
- if (param.yesno<0) ftbe->flags|=FTB_FLAG_NO;
- ftbe->weight=weight;
- ftbe->up=up;
- ftbe->max_docid= ftbe->ythresh= ftbe->yweaks= 0;
- ftbe->docid[0]=ftbe->docid[1]=HA_OFFSET_ERROR;
- ftbe->phrase= NULL;
- if (param.quot) ftb->with_scan|=2;
- if (param.yesno > 0) up->ythresh++;
- _ftb_parse_query(ftb, start, end, ftbe, depth+1, param.quot);
- param.quot=0;
- break;
- case 3: /* right bracket */
- if (up_quot) up->phrase= list_reverse(up->phrase);
- return;
- }
- }
- return;
+ ftb_param.ftb= ftb;
+ ftb_param.depth= 0;
+ ftb_param.ftbe= ftb->root;
+ ftb_param.up_quot= 0;
+
+ param.mysql_parse= ftb_parse_query_internal;
+ param.mysql_add_word= ftb_query_add_word;
+ param.ftparser_state= 0;
+ param.mysql_ftparam= (void *)&ftb_param;
+ param.cs= ftb->charset;
+ param.doc= query;
+ param.length= len;
+ param.mode= MYSQL_FTPARSER_FULL_BOOLEAN_INFO;
+ parser->parse(&param);
+ DBUG_VOID_RETURN;
}
+
static int _ftb_no_dupes_cmp(void* not_used __attribute__((unused)),
const void *a,const void *b)
@@ -463,8 +539,11 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query,
ftbe->max_docid= ftbe->ythresh= ftbe->yweaks= 0;
ftbe->docid[0]=ftbe->docid[1]=HA_OFFSET_ERROR;
ftbe->phrase= NULL;
+ ftbe->document= 0;
ftb->root=ftbe;
- _ftb_parse_query(ftb, &query, query+query_len, ftbe, 0, NULL);
+ _ftb_parse_query(ftb, query, query_len, keynr == NO_SUCH_KEY ?
+ &ft_default_parser :
+ info->s->keyinfo[keynr].parser);
ftb->list=(FTB_WORD **)alloc_root(&ftb->mem_root,
sizeof(FTB_WORD *)*ftb->queue.elements);
memcpy(ftb->list, ftb->queue.root+1, sizeof(FTB_WORD *)*ftb->queue.elements);
@@ -480,6 +559,62 @@ err:
}
+typedef struct st_my_ftb_phrase_param
+{
+ LIST *phrase;
+ LIST *document;
+ CHARSET_INFO *cs;
+ uint phrase_length;
+ uint document_length;
+ uint match;
+} MY_FTB_PHRASE_PARAM;
+
+
+static int ftb_phrase_add_word(void *param, byte *word, uint word_len,
+ MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info __attribute__((unused)))
+{
+ MY_FTB_PHRASE_PARAM *phrase_param= (MY_FTB_PHRASE_PARAM *)param;
+ FT_WORD *w= (FT_WORD *)phrase_param->document->data;
+ LIST *phrase, *document;
+ w->pos= word;
+ w->len= word_len;
+ phrase_param->document= phrase_param->document->prev;
+ if (phrase_param->phrase_length > phrase_param->document_length)
+ {
+ phrase_param->document_length++;
+ return 0;
+ }
+ /* TODO: rewrite phrase search to avoid
+ comparing the same word twice. */
+ for (phrase= phrase_param->phrase, document= phrase_param->document->next;
+ phrase; phrase= phrase->next, document= document->next)
+ {
+ FT_WORD *phrase_word= (FT_WORD *)phrase->data;
+ FT_WORD *document_word= (FT_WORD *)document->data;
+ if (my_strnncoll(phrase_param->cs, phrase_word->pos, phrase_word->len,
+ document_word->pos, document_word->len))
+ return 0;
+ }
+ phrase_param->match++;
+ return 0;
+}
+
+
+static int ftb_check_phrase_internal(void *param, byte *document, uint len)
+{
+ FT_WORD word;
+ MY_FTB_PHRASE_PARAM *phrase_param= (MY_FTB_PHRASE_PARAM *)param;
+ const byte *docend= document + len;
+ while (ft_simple_get_word(phrase_param->cs, &document, docend, &word, FALSE))
+ {
+ ftb_phrase_add_word(param, word.pos, word.len, 0);
+ if (phrase_param->match)
+ return 1;
+ }
+ return 0;
+}
+
+
/*
Checks if given buffer matches phrase list.
@@ -494,32 +629,31 @@ err:
1 is returned if phrase found, 0 else.
*/
-static int _ftb_check_phrase(const byte *s0, const byte *e0,
- LIST *phrase, CHARSET_INFO *cs)
+static int _ftb_check_phrase(const byte *document, uint len,
+ FTB_EXPR *ftbe, CHARSET_INFO *cs,
+ struct st_mysql_ftparser *parser)
{
- FT_WORD h_word;
- const byte *h_start= s0;
- DBUG_ENTER("_ftb_strstr");
- DBUG_ASSERT(phrase);
-
- while (ft_simple_get_word(cs, (byte **)&h_start, e0, &h_word, FALSE))
- {
- FT_WORD *n_word;
- LIST *phrase_element= phrase;
- const byte *h_start1= h_start;
- for (;;)
- {
- n_word= (FT_WORD *)phrase_element->data;
- if (my_strnncoll(cs, (const uchar *) h_word.pos, h_word.len,
- (const uchar *) n_word->pos, n_word->len))
- break;
- if (! (phrase_element= phrase_element->next))
- DBUG_RETURN(1);
- if (! ft_simple_get_word(cs, (byte **)&h_start1, e0, &h_word, FALSE))
- DBUG_RETURN(0);
- }
- }
- DBUG_RETURN(0);
+ MY_FTB_PHRASE_PARAM ftb_param;
+ MYSQL_FTPARSER_PARAM param;
+ DBUG_ENTER("_ftb_check_phrase");
+ DBUG_ASSERT(parser);
+ ftb_param.phrase= ftbe->phrase;
+ ftb_param.document= ftbe->document;
+ ftb_param.cs= cs;
+ ftb_param.phrase_length= list_length(ftbe->phrase);
+ ftb_param.document_length= 1;
+ ftb_param.match= 0;
+
+ param.mysql_parse= ftb_check_phrase_internal;
+ param.mysql_add_word= ftb_phrase_add_word;
+ param.ftparser_state= 0;
+ param.mysql_ftparam= (void *)&ftb_param;
+ param.cs= cs;
+ param.doc= (byte *)document;
+ param.length= len;
+ param.mode= MYSQL_FTPARSER_WITH_STOPWORDS;
+ parser->parse(&param);
+ DBUG_RETURN(ftb_param.match ? 1 : 0);
}
@@ -530,6 +664,9 @@ static void _ftb_climb_the_tree(FTB *ftb, FTB_WORD *ftbw, FT_SEG_ITERATOR *ftsi_
float weight=ftbw->weight;
int yn=ftbw->flags, ythresh, mode=(ftsi_orig != 0);
my_off_t curdoc=ftbw->docid[mode];
+ struct st_mysql_ftparser *parser= ftb->keynr == NO_SUCH_KEY ?
+ &ft_default_parser :
+ ftb->info->s->keyinfo[ftb->keynr].parser;
for (ftbe=ftbw->up; ftbe; ftbe=ftbe->up)
{
@@ -559,8 +696,8 @@ static void _ftb_climb_the_tree(FTB *ftb, FTB_WORD *ftbw, FT_SEG_ITERATOR *ftsi_
{
if (!ftsi.pos)
continue;
- not_found = ! _ftb_check_phrase(ftsi.pos, ftsi.pos+ftsi.len,
- ftbe->phrase, ftb->charset);
+ not_found = ! _ftb_check_phrase(ftsi.pos, ftsi.len,
+ ftbe, ftb->charset, parser);
}
if (not_found) break;
} /* ftbe->quot */
@@ -667,14 +804,67 @@ err:
}
-float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length)
+typedef struct st_my_ftb_find_param
{
- FT_WORD word;
+ FT_INFO *ftb;
+ FT_SEG_ITERATOR *ftsi;
+} MY_FTB_FIND_PARAM;
+
+
+static int ftb_find_relevance_add_word(void *param, byte *word, uint len,
+ MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info __attribute__((unused)))
+{
+ MY_FTB_FIND_PARAM *ftb_param= (MY_FTB_FIND_PARAM *)param;
+ FT_INFO *ftb= ftb_param->ftb;
FTB_WORD *ftbw;
+ int a, b, c;
+ for (a= 0, b= ftb->queue.elements, c= (a+b)/2; b-a>1; c= (a+b)/2)
+ {
+ ftbw= ftb->list[c];
+ if (mi_compare_text(ftb->charset, (uchar*)word, len,
+ (uchar*)ftbw->word+1, ftbw->len-1,
+ (my_bool)(ftbw->flags&FTB_FLAG_TRUNC), 0) > 0)
+ b= c;
+ else
+ a= c;
+ }
+ for (; c >= 0; c--)
+ {
+ ftbw= ftb->list[c];
+ if (mi_compare_text(ftb->charset, (uchar*)word, len,
+ (uchar*)ftbw->word + 1,ftbw->len - 1,
+ (my_bool)(ftbw->flags & FTB_FLAG_TRUNC), 0))
+ break;
+ if (ftbw->docid[1] == ftb->info->lastpos)
+ continue;
+ ftbw->docid[1]= ftb->info->lastpos;
+ _ftb_climb_the_tree(ftb, ftbw, ftb_param->ftsi);
+ }
+ return(0);
+}
+
+
+static int ftb_find_relevance_parse(void *param, byte *doc, uint len)
+{
+ FT_INFO *ftb= ((MY_FTB_FIND_PARAM *)param)->ftb;
+ byte *end= doc + len;
+ FT_WORD w;
+ while (ft_simple_get_word(ftb->charset, &doc, end, &w, TRUE))
+ ftb_find_relevance_add_word(param, w.pos, w.len, 0);
+ return(0);
+}
+
+
+float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length)
+{
FTB_EXPR *ftbe;
FT_SEG_ITERATOR ftsi, ftsi2;
- const byte *end;
my_off_t docid=ftb->info->lastpos;
+ MY_FTB_FIND_PARAM ftb_param;
+ MYSQL_FTPARSER_PARAM param;
+ struct st_mysql_ftparser *parser= ftb->keynr == NO_SUCH_KEY ?
+ &ft_default_parser :
+ ftb->info->s->keyinfo[ftb->keynr].parser;
if (docid == HA_OFFSET_ERROR)
return -2.0;
@@ -702,41 +892,23 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length)
_mi_ft_segiterator_init(ftb->info, ftb->keynr, record, &ftsi);
memcpy(&ftsi2, &ftsi, sizeof(ftsi));
+ ftb_param.ftb= ftb;
+ ftb_param.ftsi= &ftsi2;
+ param.mysql_parse= ftb_find_relevance_parse;
+ param.mysql_add_word= ftb_find_relevance_add_word;
+ param.ftparser_state= 0;
+ param.mysql_ftparam= (void *)&ftb_param;
+ param.cs= ftb->charset;
+ param.mode= MYSQL_FTPARSER_SIMPLE_MODE;
while (_mi_ft_segiterator(&ftsi))
{
if (!ftsi.pos)
continue;
- end=ftsi.pos+ftsi.len;
- while (ft_simple_get_word(ftb->charset, (byte **) &ftsi.pos,
- (byte *) end, &word, TRUE))
- {
- int a, b, c;
- for (a=0, b=ftb->queue.elements, c=(a+b)/2; b-a>1; c=(a+b)/2)
- {
- ftbw=ftb->list[c];
- if (mi_compare_text(ftb->charset, (uchar*) word.pos, word.len,
- (uchar*) ftbw->word+1, ftbw->len-1,
- (my_bool) (ftbw->flags&FTB_FLAG_TRUNC),0) >0)
- b=c;
- else
- a=c;
- }
- for (; c>=0; c--)
- {
- ftbw=ftb->list[c];
- if (mi_compare_text(ftb->charset, (uchar*) word.pos, word.len,
- (uchar*) ftbw->word+1,ftbw->len-1,
- (my_bool) (ftbw->flags&FTB_FLAG_TRUNC),0))
- break;
- if (ftbw->docid[1] == docid)
- continue;
- ftbw->docid[1]=docid;
- _ftb_climb_the_tree(ftb, ftbw, &ftsi2);
- }
- }
+ param.doc= (byte *)ftsi.pos;
+ param.length= ftsi.len;
+ parser->parse(&param);
}
-
ftbe=ftb->root;
if (ftbe->docid[1]==docid && ftbe->cur_weight>0 &&
ftbe->yesses>=ftbe->ythresh && !ftbe->nos)
diff --git a/storage/myisam/ft_nlq_search.c b/storage/myisam/ft_nlq_search.c
index 8460db61a36..82857100d23 100644
--- a/storage/myisam/ft_nlq_search.c
+++ b/storage/myisam/ft_nlq_search.c
@@ -230,7 +230,7 @@ FT_INFO *ft_init_nlq_search(MI_INFO *info, uint keynr, byte *query,
NULL, NULL);
ft_parse_init(&wtree, aio.charset);
- if (ft_parse(&wtree,query,query_len,0))
+ if (ft_parse(&wtree, query, query_len, 0, info->s->keyinfo[keynr].parser))
goto err;
if (tree_walk(&wtree, (tree_walk_action)&walk_and_match, &aio,
diff --git a/storage/myisam/ft_parser.c b/storage/myisam/ft_parser.c
index 2fad2363ae2..f21240bbfd9 100644
--- a/storage/myisam/ft_parser.c
+++ b/storage/myisam/ft_parser.c
@@ -24,6 +24,14 @@ typedef struct st_ft_docstat {
double sum;
} FT_DOCSTAT;
+
+typedef struct st_my_ft_parser_param
+{
+ TREE *wtree;
+ my_bool with_alloc;
+} MY_FT_PARSER_PARAM;
+
+
static int FT_WORD_cmp(CHARSET_INFO* cs, FT_WORD *w1, FT_WORD *w2)
{
return mi_compare_text(cs, (uchar*) w1->pos, w1->len,
@@ -102,13 +110,14 @@ my_bool ft_boolean_check_syntax_string(const byte *str)
4 - stopword found
*/
byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end,
- FT_WORD *word, FTB_PARAM *param)
+ FT_WORD *word, MYSQL_FTPARSER_BOOLEAN_INFO *param)
{
byte *doc=*start;
uint mwc, length, mbl;
param->yesno=(FTB_YES==' ') ? 1 : (param->quot != 0);
- param->plusminus=param->pmsign=0;
+ param->weight_adjust= param->wasign= 0;
+ param->type= FT_TOKEN_EOF;
while (doc<end)
{
@@ -119,7 +128,8 @@ byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end,
{
param->quot=doc;
*start=doc+1;
- return 3; /* FTB_RBR */
+ param->type= FT_TOKEN_RIGHT_PAREN;
+ goto ret;
}
if (!param->quot)
{
@@ -128,21 +138,22 @@ byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end,
/* param->prev=' '; */
*start=doc+1;
if (*doc == FTB_LQUOT) param->quot=*start;
- return (*doc == FTB_RBR)+2;
+ param->type= (*doc == FTB_RBR ? FT_TOKEN_RIGHT_PAREN : FT_TOKEN_LEFT_PAREN);
+ goto ret;
}
if (param->prev == ' ')
{
if (*doc == FTB_YES ) { param->yesno=+1; continue; } else
if (*doc == FTB_EGAL) { param->yesno= 0; continue; } else
if (*doc == FTB_NO ) { param->yesno=-1; continue; } else
- if (*doc == FTB_INC ) { param->plusminus++; continue; } else
- if (*doc == FTB_DEC ) { param->plusminus--; continue; } else
- if (*doc == FTB_NEG ) { param->pmsign=!param->pmsign; continue; }
+ if (*doc == FTB_INC ) { param->weight_adjust++; continue; } else
+ if (*doc == FTB_DEC ) { param->weight_adjust--; continue; } else
+ if (*doc == FTB_NEG ) { param->wasign= !param->wasign; continue; }
}
}
param->prev=*doc;
param->yesno=(FTB_YES==' ') ? 1 : (param->quot != 0);
- param->plusminus=param->pmsign=0;
+ param->weight_adjust= param->wasign= 0;
}
mwc=length=0;
@@ -161,20 +172,24 @@ byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end,
|| param->trunc) && length < ft_max_word_len)
{
*start=doc;
- return 1;
+ param->type= FT_TOKEN_WORD;
+ goto ret;
}
else if (length) /* make sure length > 0 (if start contains spaces only) */
{
*start= doc;
- return 4;
+ param->type= FT_TOKEN_STOPWORD;
+ goto ret;
}
}
if (param->quot)
{
param->quot=*start=doc;
- return 3; /* FTB_RBR */
+ param->type= 3; /* FT_RBR */
+ goto ret;
}
- return 0;
+ret:
+ return param->type;
}
byte ft_simple_get_word(CHARSET_INFO *cs, byte **start, const byte *end,
@@ -220,30 +235,67 @@ void ft_parse_init(TREE *wtree, CHARSET_INFO *cs)
DBUG_VOID_RETURN;
}
-int ft_parse(TREE *wtree, byte *doc, int doclen, my_bool with_alloc)
+
+static int ft_add_word(void *param, byte *word, uint word_len,
+ MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info __attribute__((unused)))
{
- byte *end=doc+doclen;
+ TREE *wtree;
FT_WORD w;
- DBUG_ENTER("ft_parse");
-
- while (ft_simple_get_word(wtree->custom_arg, &doc, end, &w, TRUE))
+ DBUG_ENTER("ft_add_word");
+ wtree= ((MY_FT_PARSER_PARAM *)param)->wtree;
+ if (((MY_FT_PARSER_PARAM *)param)->with_alloc)
{
- if (with_alloc)
- {
- byte *ptr;
- /* allocating the data in the tree - to avoid mallocs and frees */
- DBUG_ASSERT(wtree->with_delete==0);
- ptr=(byte *)alloc_root(& wtree->mem_root,w.len);
- memcpy(ptr, w.pos, w.len);
- w.pos=ptr;
- }
- if (!tree_insert(wtree, &w, 0, wtree->custom_arg))
- goto err;
+ byte *ptr;
+ /* allocating the data in the tree - to avoid mallocs and frees */
+ DBUG_ASSERT(wtree->with_delete == 0);
+ ptr= (byte *)alloc_root(&wtree->mem_root, word_len);
+ memcpy(ptr, word, word_len);
+ w.pos= ptr;
+ }
+ else
+ w.pos= word;
+ w.len= word_len;
+ if (!tree_insert(wtree, &w, 0, wtree->custom_arg))
+ {
+ delete_tree(wtree);
+ DBUG_RETURN(1);
}
DBUG_RETURN(0);
+}
-err:
- delete_tree(wtree);
- DBUG_RETURN(1);
+
+static int ft_parse_internal(void *param, byte *doc, uint doc_len)
+{
+ byte *end=doc+doc_len;
+ FT_WORD w;
+ TREE *wtree;
+ DBUG_ENTER("ft_parse_internal");
+
+ wtree= ((MY_FT_PARSER_PARAM *)param)->wtree;
+ while (ft_simple_get_word(wtree->custom_arg, &doc, end, &w, TRUE))
+ if (ft_add_word(param, w.pos, w.len, 0))
+ DBUG_RETURN(1);
+ DBUG_RETURN(0);
}
+
+int ft_parse(TREE *wtree, byte *doc, int doclen, my_bool with_alloc,
+ struct st_mysql_ftparser *parser)
+{
+ MYSQL_FTPARSER_PARAM param;
+ MY_FT_PARSER_PARAM my_param;
+ DBUG_ENTER("ft_parse");
+ DBUG_ASSERT(parser);
+ my_param.wtree= wtree;
+ my_param.with_alloc= with_alloc;
+
+ param.mysql_parse= ft_parse_internal;
+ param.mysql_add_word= ft_add_word;
+ param.ftparser_state= 0;
+ param.mysql_ftparam= &my_param;
+ param.cs= wtree->custom_arg;
+ param.doc= doc;
+ param.length= doclen;
+ param.mode= MYSQL_FTPARSER_SIMPLE_MODE;
+ DBUG_RETURN(parser->parse(&param));
+}
diff --git a/storage/myisam/ft_static.c b/storage/myisam/ft_static.c
index e221950f445..6cfb0d59e62 100644
--- a/storage/myisam/ft_static.c
+++ b/storage/myisam/ft_static.c
@@ -626,3 +626,14 @@ const char *ft_precompiled_stopwords[] = {
#endif
NULL };
+
+static int ft_default_parser_parse(MYSQL_FTPARSER_PARAM *param)
+{
+ return param->mysql_parse(param->mysql_ftparam, param->doc, param->length);
+}
+
+struct st_mysql_ftparser ft_default_parser=
+{
+ MYSQL_FTPARSER_INTERFACE_VERSION, ft_default_parser_parse, 0, 0
+};
+
diff --git a/storage/myisam/ft_update.c b/storage/myisam/ft_update.c
index b8cd925bf4f..623419bd701 100644
--- a/storage/myisam/ft_update.c
+++ b/storage/myisam/ft_update.c
@@ -99,15 +99,17 @@ uint _mi_ft_parse(TREE *parsed, MI_INFO *info, uint keynr,
const byte *record, my_bool with_alloc)
{
FT_SEG_ITERATOR ftsi;
+ struct st_mysql_ftparser *parser;
DBUG_ENTER("_mi_ft_parse");
_mi_ft_segiterator_init(info, keynr, record, &ftsi);
ft_parse_init(parsed, info->s->keyinfo[keynr].seg->charset);
+ parser= info->s->keyinfo[keynr].parser;
while (_mi_ft_segiterator(&ftsi))
{
if (ftsi.pos)
- if (ft_parse(parsed, (byte *)ftsi.pos, ftsi.len, with_alloc))
+ if (ft_parse(parsed, (byte *)ftsi.pos, ftsi.len, with_alloc, parser))
DBUG_RETURN(1);
}
DBUG_RETURN(0);
diff --git a/storage/myisam/ftdefs.h b/storage/myisam/ftdefs.h
index 91c679a1e58..11a283e0eb3 100644
--- a/storage/myisam/ftdefs.h
+++ b/storage/myisam/ftdefs.h
@@ -22,6 +22,7 @@
#include <m_ctype.h>
#include <my_tree.h>
#include <queues.h>
+#include <plugin.h>
#define true_word_char(s,X) (my_isalnum(s,X) || (X)=='_')
#define misc_word_char(X) ((X)=='\'')
@@ -98,20 +99,12 @@ typedef struct st_ft_word {
double weight;
} FT_WORD;
-typedef struct st_ftb_param {
- byte prev;
- int yesno;
- int plusminus;
- bool pmsign;
- bool trunc;
- byte *quot;
-} FTB_PARAM;
-
int is_stopword(char *word, uint len);
uint _ft_make_key(MI_INFO *, uint , byte *, FT_WORD *, my_off_t);
-byte ft_get_word(CHARSET_INFO *, byte **, byte *, FT_WORD *, FTB_PARAM *);
+byte ft_get_word(CHARSET_INFO *, byte **, byte *, FT_WORD *,
+ MYSQL_FTPARSER_BOOLEAN_INFO *);
byte ft_simple_get_word(CHARSET_INFO *, byte **, const byte *,
FT_WORD *, my_bool);
@@ -126,7 +119,7 @@ void _mi_ft_segiterator_dummy_init(const byte *, uint, FT_SEG_ITERATOR *);
uint _mi_ft_segiterator(FT_SEG_ITERATOR *);
void ft_parse_init(TREE *, CHARSET_INFO *);
-int ft_parse(TREE *, byte *, int, my_bool);
+int ft_parse(TREE *, byte *, int, my_bool, struct st_mysql_ftparser *parser);
FT_WORD * ft_linearize(TREE *);
FT_WORD * _mi_ft_parserecord(MI_INFO *, uint, const byte *);
uint _mi_ft_parse(TREE *, MI_INFO *, uint, const byte *, my_bool);
diff --git a/storage/myisam/mi_open.c b/storage/myisam/mi_open.c
index 955d55cf765..91136bbeef5 100644
--- a/storage/myisam/mi_open.c
+++ b/storage/myisam/mi_open.c
@@ -1047,6 +1047,7 @@ char *mi_keydef_read(char *ptr, MI_KEYDEF *keydef)
keydef->block_size = keydef->block_length/MI_MIN_KEY_BLOCK_LENGTH-1;
keydef->underflow_block_length=keydef->block_length/3;
keydef->version = 0; /* Not saved */
+ keydef->parser = &ft_default_parser;
return ptr;
}