diff options
-rw-r--r-- | Docs/manual.texi | 16 | ||||
-rw-r--r-- | include/ft_global.h | 2 | ||||
-rw-r--r-- | include/my_base.h | 1 | ||||
-rw-r--r-- | myisam/ft_boolean_search.c | 22 | ||||
-rw-r--r-- | myisam/ft_dump.c | 12 | ||||
-rw-r--r-- | myisam/ft_nlq_search.c | 12 | ||||
-rw-r--r-- | myisam/ft_parser.c | 6 | ||||
-rw-r--r-- | myisam/ft_update.c | 6 | ||||
-rw-r--r-- | myisam/ftdefs.h | 6 | ||||
-rw-r--r-- | mysql-test/t/fulltext.test | 13 | ||||
-rw-r--r-- | sql/item_func.cc | 104 | ||||
-rw-r--r-- | sql/item_func.h | 27 | ||||
-rw-r--r-- | sql/sql_select.cc | 4 |
13 files changed, 113 insertions, 118 deletions
diff --git a/Docs/manual.texi b/Docs/manual.texi index 73e1449fcb4..51f1b7b66d8 100644 --- a/Docs/manual.texi +++ b/Docs/manual.texi @@ -29167,8 +29167,6 @@ mysql> select STRCMP('text', 'text'); relevance - similarity measure between the text in columns @code{(col1,col2,...)} and the query @code{expr}. Relevance is a positive floating-point number. Zero relevance means no similarity. -For @code{MATCH ... AGAINST()} to work, a @strong{FULLTEXT} index -must be created first. @xref{CREATE TABLE, , @code{CREATE TABLE}}. @code{MATCH ... AGAINST()} is available in MySQL version 3.23.23 or later. @code{IN BOOLEAN MODE} extension was added in version 4.0.1. For details and usage examples @pxref{Fulltext Search}. @@ -33828,9 +33826,10 @@ mysql> SELECT * FROM articles WHERE MATCH (title,body) AGAINST ( This query retrieved all the rows that contain the word @code{MySQL} (note: 50% threshold is gone), but does @strong{not} contain the word -@code{YourSQL}. Note that it does not auto-magically sort rows in +@code{YourSQL}. Note, that it does not auto-magically sort rows in derceasing relevance order (the last row has the highest relevance, -as it contains @code{MySQL} twice). +as it contains @code{MySQL} twice). Boolean fulltext search can also +work even without @code{FULLTEXT} index, but it would be @strong{slow}. Boolean fulltext search supports the following operators: @@ -33890,10 +33889,12 @@ order), but rank ``gates to hell'' higher than ``bill gates''. @itemize @bullet @item All parameters to the @code{MATCH} function must be columns from the -same table that is part of the same fulltext index. +same table that is part of the same fulltext index, unless this +@code{MATCH} is @code{IN BOOLEAN MODE}. @item Column list between @code{MATCH} and @code{AGAINST} must match exactly -a column list in the @code{FULLTEXT} index definition. +a column list in the @code{FULLTEXT} index definition, unless this +@code{MATCH} is @code{IN BOOLEAN MODE}. @item The argument to @code{AGAINST} must be a constant string. @end itemize @@ -45853,6 +45854,9 @@ Our TODO section contains what we plan to have in 4.0. @xref{TODO MySQL 4.0}. @itemize @bullet @item +@code{MATCH ... AGAINST(... IN BOOLEAN MODE)} can now work +without @code{FULLTEXT} index. +@item Added @file{myisam/ft_dump} utility for low-level inspection of @code{FULLTEXT} indexes. @item diff --git a/include/ft_global.h b/include/ft_global.h index 52fb8d38a9a..064dd7a6538 100644 --- a/include/ft_global.h +++ b/include/ft_global.h @@ -32,7 +32,7 @@ extern "C" { typedef struct st_ft_info FT_INFO; struct _ft_vft { int (*read_next)(FT_INFO *, char *); - float (*find_relevance)(FT_INFO *, my_off_t, byte *); + float (*find_relevance)(FT_INFO *, byte *, uint); void (*close_search)(FT_INFO *); float (*get_relevance)(FT_INFO *); void (*reinit_search)(FT_INFO *); diff --git a/include/my_base.h b/include/my_base.h index adb2366f454..abd2ac602e5 100644 --- a/include/my_base.h +++ b/include/my_base.h @@ -226,6 +226,7 @@ enum ha_base_keytype { /* Other constants */ #define HA_NAMELEN 64 /* Max length of saved filename */ +#define NO_SUCH_KEY ((uint)~0) /* used as a key no. */ /* Intern constants in databases */ diff --git a/myisam/ft_boolean_search.c b/myisam/ft_boolean_search.c index 70ba7fc9df2..0055842c24b 100644 --- a/myisam/ft_boolean_search.c +++ b/myisam/ft_boolean_search.c @@ -152,13 +152,16 @@ void _ftb_init_index_search(FT_INFO *ftb) int i, r; FTB_WORD *ftbw; MI_INFO *info=ftb->info; - MI_KEYDEF *keyinfo=info->s->keyinfo+ftb->keynr; - my_off_t keyroot=info->s->state.key_root[ftb->keynr]; + MI_KEYDEF *keyinfo; + my_off_t keyroot; - if (ftb->state != READY) + if (ftb->state != READY || ftb->keynr == NO_SUCH_KEY) return; ftb->state=INDEX_SEARCH; + keyinfo=info->s->keyinfo+ftb->keynr; + keyroot=info->s->state.key_root[ftb->keynr]; + for (i=ftb->queue.elements; i; i--) { ftbw=(FTB_WORD *)(ftb->queue.root[i]); @@ -352,14 +355,17 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record) return my_errno=HA_ERR_END_OF_FILE; } -float ft_boolean_find_relevance(FT_INFO *ftb, my_off_t docid, byte *record) +float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) { TREE ptree; FT_WORD word; FTB_WORD *ftbw; FTB_EXPR *ftbe; uint i; + my_off_t docid=ftb->info->lastpos; + if (docid == HA_POS_ERROR) + return -2.0; if (ftb->state == READY || ftb->state == INDEX_DONE) { for (i=1; i<=ftb->queue.elements; i++) @@ -382,11 +388,13 @@ float ft_boolean_find_relevance(FT_INFO *ftb, my_off_t docid, byte *record) ftb->state=SCAN; } else if (ftb->state != SCAN) - return -2.0; + return -3.0; bzero(&ptree, sizeof(ptree)); - if (_mi_ft_parse(& ptree, ftb->info, ftb->keynr, record)) - return -3.0; + if ((ftb->keynr==NO_SUCH_KEY) + ? ft_parse(& ptree, record, length) + : _mi_ft_parse(& ptree, ftb->info, ftb->keynr, record)) + return -4.0; for (i=1; i<=ftb->queue.elements; i++) { diff --git a/myisam/ft_dump.c b/myisam/ft_dump.c index 2c85669ff0e..940164f89c5 100644 --- a/myisam/ft_dump.c +++ b/myisam/ft_dump.c @@ -159,7 +159,7 @@ err: return 0; } -const char *options="dscve:h"; +const char *options="dscvh"; static void get_options(int argc, char *argv[]) { @@ -184,7 +184,15 @@ static void get_options(int argc, char *argv[]) static void usage(char *argv[]) { - printf("Use: %s [-%s] <table_name> <key_no>\n", *argv, options); + printf(" +Use: %s [-%s] <table_name> <index_no> + +-d dump index (incl. data offsets and word weights) +-s report global stats +-c calculate per-word stats (counts and global weights) +-v be verbose +-h this text\n +", *argv, options); exit(1); } diff --git a/myisam/ft_nlq_search.c b/myisam/ft_nlq_search.c index 2a55ff839ca..5bb2ffab939 100644 --- a/myisam/ft_nlq_search.c +++ b/myisam/ft_nlq_search.c @@ -169,7 +169,7 @@ static int FT_DOC_cmp(FT_DOC *a, FT_DOC *b) FT_INFO *ft_init_nlq_search(MI_INFO *info, uint keynr, byte *query, uint query_len, my_bool presort) { - TREE *wtree, allocated_wtree; + TREE allocated_wtree, *wtree=&allocated_wtree; ALL_IN_ONE aio; FT_DOC *dptr; FT_INFO *dlist=NULL; @@ -193,7 +193,7 @@ FT_INFO *ft_init_nlq_search(MI_INFO *info, uint keynr, byte *query, init_tree(&aio.dtree,0,0,sizeof(FT_SUPERDOC),(qsort_cmp2)&FT_SUPERDOC_cmp,0, NULL, NULL); - if(!(wtree=ft_parse(&allocated_wtree,query,query_len))) + if(ft_parse(&allocated_wtree,query,query_len)) goto err; if(tree_walk(wtree, (tree_walk_action)&walk_and_match, &aio, @@ -247,11 +247,15 @@ int ft_nlq_read_next(FT_INFO *handler, char *record) return my_errno; } -float ft_nlq_find_relevance(FT_INFO *handler, my_off_t docid, - byte *record __attribute__((unused))) +float ft_nlq_find_relevance(FT_INFO *handler, + byte *record __attribute__((unused)), uint length __attribute__((unused))) { int a,b,c; FT_DOC *docs=handler->doc; + my_off_t docid=handler->info->lastpos; + + if (docid == HA_POS_ERROR) + return -5.0; /* Assuming docs[] is sorted by dpos... */ diff --git a/myisam/ft_parser.c b/myisam/ft_parser.c index 35e5959b556..0d1495da548 100644 --- a/myisam/ft_parser.c +++ b/myisam/ft_parser.c @@ -206,7 +206,7 @@ byte ft_simple_get_word(byte **start, byte *end, FT_WORD *word) return 0; } -TREE * ft_parse(TREE *wtree, byte *doc, int doclen) +int ft_parse(TREE *wtree, byte *doc, int doclen) { byte *end=doc+doclen; FT_WORD w; @@ -221,10 +221,10 @@ TREE * ft_parse(TREE *wtree, byte *doc, int doclen) if (!tree_insert(wtree, &w, 0)) goto err; } - return wtree; + return 0; err: delete_tree(wtree); - return NULL; + return 1; } diff --git a/myisam/ft_update.c b/myisam/ft_update.c index 89da06e673f..1bc0ace6c77 100644 --- a/myisam/ft_update.c +++ b/myisam/ft_update.c @@ -28,7 +28,7 @@ /**************************************************************/ -/* parses a document i.e. calls _mi_ft_parse for every keyseg */ +/* parses a document i.e. calls ft_parse for every keyseg */ uint _mi_ft_parse(TREE *parsed, MI_INFO *info, uint keynr, const byte *record) { byte *pos; @@ -57,11 +57,11 @@ uint _mi_ft_parse(TREE *parsed, MI_INFO *info, uint keynr, const byte *record) } else len=keyseg->length; - if (!(ft_parse(parsed, pos, len))) + if (ft_parse(parsed, pos, len)) return 1; } /* Handle the case where all columns are NULL */ - if (!is_tree_inited(parsed) && !(ft_parse(parsed, (byte*) "", 0))) + if (!is_tree_inited(parsed) && ft_parse(parsed, (byte*) "", 0)) return 1; else return 0; diff --git a/myisam/ftdefs.h b/myisam/ftdefs.h index fd3660edcba..147c3f5b5e6 100644 --- a/myisam/ftdefs.h +++ b/myisam/ftdefs.h @@ -120,14 +120,14 @@ uint _ft_make_key(MI_INFO *, uint , byte *, FT_WORD *, my_off_t); byte ft_get_word(byte **, byte *, FT_WORD *, FTB_PARAM *); byte ft_simple_get_word(byte **, byte *, FT_WORD *); -TREE * ft_parse(TREE *, byte *, int); +int ft_parse(TREE *, byte *, int); FT_WORD * ft_linearize(/*MI_INFO *, uint, byte *, */TREE *); FT_WORD * _mi_ft_parserecord(MI_INFO *, uint, byte *, const byte *); const struct _ft_vft _ft_vft_nlq; FT_INFO *ft_init_nlq_search(MI_INFO *, uint, byte *, uint, my_bool); int ft_nlq_read_next(FT_INFO *, char *); -float ft_nlq_find_relevance(FT_INFO *, my_off_t, byte *); +float ft_nlq_find_relevance(FT_INFO *, byte *, uint); void ft_nlq_close_search(FT_INFO *); float ft_nlq_get_relevance(FT_INFO *); my_off_t ft_nlq_get_docid(FT_INFO *); @@ -136,7 +136,7 @@ void ft_nlq_reinit_search(FT_INFO *); const struct _ft_vft _ft_vft_boolean; FT_INFO *ft_init_boolean_search(MI_INFO *, uint, byte *, uint, my_bool); int ft_boolean_read_next(FT_INFO *, char *); -float ft_boolean_find_relevance(FT_INFO *, my_off_t, byte *); +float ft_boolean_find_relevance(FT_INFO *, byte *, uint); void ft_boolean_close_search(FT_INFO *); float ft_boolean_get_relevance(FT_INFO *); my_off_t ft_boolean_get_docid(FT_INFO *); diff --git a/mysql-test/t/fulltext.test b/mysql-test/t/fulltext.test index 81993b167c6..ab3fc194891 100644 --- a/mysql-test/t/fulltext.test +++ b/mysql-test/t/fulltext.test @@ -10,9 +10,15 @@ INSERT INTO t1 VALUES('MySQL has now support', 'for full-text search'), ('Only MyISAM tables','support collections'), ('Function MATCH ... AGAINST()','is used to do a search'), ('Full-text search in MySQL', 'implements vector space model'); + +# nl search + select * from t1 where MATCH(a,b) AGAINST ("collections"); select * from t1 where MATCH(a,b) AGAINST ("indexes"); select * from t1 where MATCH(a,b) AGAINST ("indexes collections"); + +# boolean search + select * from t1 where MATCH(a,b) AGAINST("support -collections" IN BOOLEAN MODE); select * from t1 where MATCH(a,b) AGAINST("support collections" IN BOOLEAN MODE); select * from t1 where MATCH(a,b) AGAINST("support +collections" IN BOOLEAN MODE); @@ -22,6 +28,13 @@ select * from t1 where MATCH(a,b) AGAINST("+search" IN BOOLEAN MODE); select * from t1 where MATCH(a,b) AGAINST("+search +(support vector)" IN BOOLEAN MODE); select * from t1 where MATCH(a,b) AGAINST("+search -(support vector)" IN BOOLEAN MODE); select *, MATCH(a,b) AGAINST("support collections" IN BOOLEAN MODE) as x from t1; + +# boolean w/o index: + +select * from t1 where MATCH a AGAINST ("search" IN BOOLEAN MODE); + +#update/delete with fulltext index + delete from t1 where a like "MySQL%"; update t1 set a='some test foobar' where MATCH a,b AGAINST ('model'); delete from t1 where MATCH(a,b) AGAINST ("indexes"); diff --git a/sql/item_func.cc b/sql/item_func.cc index 86cc3283955..fe68d8f47c2 100644 --- a/sql/item_func.cc +++ b/sql/item_func.cc @@ -2004,6 +2004,9 @@ void Item_func_match::init_search(bool no_order) return; } + if (key == NO_SUCH_KEY) + concat=new Item_func_concat_ws (new Item_string(" ",1), fields); + String *ft_tmp=0; char tmp1[FT_QUERY_MAXLEN]; String tmp2(tmp1,sizeof(tmp1)); @@ -2015,7 +2018,8 @@ void Item_func_match::init_search(bool no_order) tmp2.set("",0); } - ft_handler_init(ft_tmp->ptr(), ft_tmp->length(), join_key && !no_order); + ft_handler=table->file->ft_init_ext(mode, key, + ft_tmp->ptr(), ft_tmp->length(), join_key && !no_order); if (join_key) { @@ -2032,12 +2036,11 @@ bool Item_func_match::fix_fields(THD *thd,struct st_table_list *tlist) maybe_null=1; join_key=0; - /* Serg: - I'd rather say now that const_item is assumed in quite a bit of - places, so it would be difficult to remove; If it would ever to be - removed, this should include modifications to find_best and auto_close - as complement to auto_init code above. - */ + /* const_item is assumed in quite a bit of places, so it would be difficult + to remove; If it would ever to be removed, this should include + modifications to find_best and auto_close as complement to auto_init code + above. + */ if (Item_func::fix_fields(thd,tlist) || !const_item()) { my_error(ER_WRONG_ARGUMENTS,MYF(0),"AGAINST"); @@ -2051,21 +2054,20 @@ bool Item_func_match::fix_fields(THD *thd,struct st_table_list *tlist) if (item->type() == Item::REF_ITEM) li.replace(item= *((Item_ref *)item)->ref); if (item->type() != Item::FIELD_ITEM || !item->used_tables()) - { - my_error(ER_WRONG_ARGUMENTS,MYF(0),"MATCH"); - return 1; - } + key=NO_SUCH_KEY; used_tables_cache|=item->used_tables(); } /* check that all columns come from the same table */ if (count_bits(used_tables_cache) != 1) + key=NO_SUCH_KEY; + const_item_cache=0; + table=((Item_field *)fields.head())->field->table; + record=table->record[0]; + if (key == NO_SUCH_KEY && mode != FT_BOOL) { my_error(ER_WRONG_ARGUMENTS,MYF(0),"MATCH"); return 1; } - const_item_cache=0; - table=((Item_field *)fields.head())->field->table; - record=table->record[0]; return 0; } @@ -2074,6 +2076,10 @@ bool Item_func_match::fix_index() List_iterator_fast<Item> li(fields); Item_field *item; uint ft_to_key[MAX_KEY], ft_cnt[MAX_KEY], fts=0, key; + uint max_cnt=0, mkeys=0; + + if (this->key == NO_SUCH_KEY) + return 0; for (key=0 ; key<table->keys ; key++) { @@ -2087,11 +2093,7 @@ bool Item_func_match::fix_index() } if (!fts) - { - my_printf_error(ER_FT_MATCHING_KEY_NOT_FOUND, - ER(ER_FT_MATCHING_KEY_NOT_FOUND),MYF(0)); - return 1; - } + goto err; while ((item=(Item_field*)(li++))) { @@ -2108,7 +2110,6 @@ bool Item_func_match::fix_index() } } - uint max_cnt=0, mkeys=0; for (key=0 ; key<fts ; key++) { if (ft_cnt[key] > max_cnt) @@ -2139,6 +2140,12 @@ bool Item_func_match::fix_index() return 0; } +err: + if (mode == FT_BOOL) + { + this->key=NO_SUCH_KEY; + return 0; + } my_printf_error(ER_FT_MATCHING_KEY_NOT_FOUND, ER(ER_FT_MATCHING_KEY_NOT_FOUND),MYF(0)); return 1; @@ -2174,61 +2181,18 @@ double Item_func_match::val() join_key=0; } - my_off_t docid=table->file->row_position(); - - if ((null_value=(docid==HA_OFFSET_ERROR))) - return 0.0; - else - return ft_handler->please->find_relevance(ft_handler, docid, record); -} - -#if 0 -double Item_func_match_nl::val() -{ - if (ft_handler==NULL) - init_search(1); - - if ((null_value= (ft_handler==NULL))) - return 0.0; - - if (join_key) + if (key == NO_SUCH_KEY) { - if (table->file->ft_handler) - return ft_handler->please->get_relevance(ft_handler); - - join_key=0; + String *a=concat->val_str(&value); + if (null_value=(a==0)) + return 0; + return ft_handler->please->find_relevance(ft_handler, + (byte *)a->ptr(), a->length()); } - - my_off_t docid=table->file->row_position(); - - if ((null_value=(docid==HA_OFFSET_ERROR))) - return 0.0; else - return ft_handler->please->find_relevance(ft_handler, docid, record); + return ft_handler->please->find_relevance(ft_handler, record, 0); } -double Item_func_match_bool::val() -{ - if (ft_handler==NULL) - init_search(1); - - if ((null_value= (ft_handler==NULL))) - return 0.0; - - if (join_key) - { - if (table->file->ft_handler) - return ft_handler->please->get_relevance(ft_handler); - - join_key=0; - } - - return ft_handler->please->find_relevance(ft_handler, docid, record); - //null_value=1; - //return -1.0; -} -#endif - /*************************************************************************** System variables This has to be recoded after we get more than 3 system variables diff --git a/sql/item_func.h b/sql/item_func.h index 182daf9f74e..2bf272f24ed 100644 --- a/sql/item_func.h +++ b/sql/item_func.h @@ -862,15 +862,18 @@ class Item_func_match :public Item_real_func { public: List<Item> fields; + Item *concat; + String value; TABLE *table; - uint key; + uint key, mode; bool join_key; Item_func_match *master; FT_INFO * ft_handler; byte *record; Item_func_match(List<Item> &a, Item *b): Item_real_func(b), - fields(a), table(0), join_key(0), master(0), ft_handler(0) {} + fields(a), table(0), join_key(0), master(0), ft_handler(0), + key(0), concat(0) {} ~Item_func_match() { if (!master && ft_handler) @@ -880,8 +883,8 @@ public: if(join_key) table->file->ft_handler=0; } + if (concat) delete concat; } - virtual int ft_handler_init(const byte *key, uint keylen, bool presort) =0; enum Functype functype() const { return FT_FUNC; } void update_used_tables() {} bool fix_fields(THD *thd,struct st_table_list *tlist); @@ -896,26 +899,16 @@ public: class Item_func_match_nl :public Item_func_match { public: - Item_func_match_nl(List<Item> &a, Item *b): Item_func_match(a,b) {} + Item_func_match_nl(List<Item> &a, Item *b): + Item_func_match(a,b) { mode=FT_NL; } const char *func_name() const { return "match_nl"; } -// double val(); - int ft_handler_init(const byte *query, uint querylen, bool presort) - { - ft_handler=table->file->ft_init_ext(FT_NL,key, query, querylen, presort); - return 0; - } }; class Item_func_match_bool :public Item_func_match { public: - Item_func_match_bool(List<Item> &a, Item *b): Item_func_match(a,b) {} + Item_func_match_bool(List<Item> &a, Item *b): + Item_func_match(a,b) { mode=FT_BOOL; } const char *func_name() const { return "match_bool"; } -// double val(); - int ft_handler_init(const byte *query, uint querylen, bool presort) - { - ft_handler=table->file->ft_init_ext(FT_BOOL,key, query, querylen, presort); - return 0; - } }; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index abb1d891166..4890e8fd6c6 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -1457,7 +1457,7 @@ add_ft_keys(DYNAMIC_ARRAY *keyuse_array, { Item *item; /* - I, (Sergei) too lazy to implement proper recursive descent here, + I'm (Sergei) too lazy to implement proper recursive descent here, and anyway, nobody will use such a stupid queries that will require it :-) May be later... @@ -1474,7 +1474,7 @@ add_ft_keys(DYNAMIC_ARRAY *keyuse_array, } } - if (!cond_func) + if (!cond_func || cond_func->key == NO_SUCH_KEY) return; KEYUSE keyuse; |