diff options
Diffstat (limited to 'myisam/ft_parser.c')
-rw-r--r-- | myisam/ft_parser.c | 25 |
1 files changed, 17 insertions, 8 deletions
diff --git a/myisam/ft_parser.c b/myisam/ft_parser.c index 543cf998a82..a3bd33944aa 100644 --- a/myisam/ft_parser.c +++ b/myisam/ft_parser.c @@ -98,6 +98,7 @@ my_bool ft_boolean_check_syntax_string(const byte *str) * 1 - word found * 2 - left bracket * 3 - right bracket + * 4 - stopword found */ byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end, FT_WORD *word, FTB_PARAM *param) @@ -161,6 +162,11 @@ byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end, *start=doc; return 1; } + else if (length) + { + *start= doc; + return 4; + } } if (param->quot) { @@ -170,18 +176,19 @@ byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end, return 0; } -byte ft_simple_get_word(CHARSET_INFO *cs, byte **start, byte *end, - FT_WORD *word) +byte ft_simple_get_word(CHARSET_INFO *cs, byte **start, const byte *end, + FT_WORD *word, my_bool skip_stopwords) { byte *doc= *start; uint mwc, length, mbl; DBUG_ENTER("ft_simple_get_word"); - while (doc<end) + do { - for (;doc<end;doc++) + for (;; doc++) { - if (true_word_char(cs,*doc)) break; + if (doc >= end) DBUG_RETURN(0); + if (true_word_char(cs, *doc)) break; } mwc= length= 0; @@ -193,13 +200,15 @@ byte ft_simple_get_word(CHARSET_INFO *cs, byte **start, byte *end, word->len= (uint)(doc-word->pos) - mwc; - if (length >= ft_min_word_len && length < ft_max_word_len && - !is_stopword(word->pos, word->len)) + if (skip_stopwords == FALSE || + (length >= ft_min_word_len && length < ft_max_word_len && + !is_stopword(word->pos, word->len))) { *start= doc; DBUG_RETURN(1); } } + while (doc < end); DBUG_RETURN(0); } @@ -217,7 +226,7 @@ int ft_parse(TREE *wtree, byte *doc, int doclen, my_bool with_alloc) FT_WORD w; DBUG_ENTER("ft_parse"); - while (ft_simple_get_word(wtree->custom_arg, &doc,end,&w)) + while (ft_simple_get_word(wtree->custom_arg, &doc, end, &w, TRUE)) { if (with_alloc) { |