summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Docs/manual.texi16
-rw-r--r--include/ft_global.h2
-rw-r--r--include/my_base.h1
-rw-r--r--myisam/ft_boolean_search.c22
-rw-r--r--myisam/ft_dump.c12
-rw-r--r--myisam/ft_nlq_search.c12
-rw-r--r--myisam/ft_parser.c6
-rw-r--r--myisam/ft_update.c6
-rw-r--r--myisam/ftdefs.h6
-rw-r--r--mysql-test/t/fulltext.test13
-rw-r--r--sql/item_func.cc104
-rw-r--r--sql/item_func.h27
-rw-r--r--sql/sql_select.cc4
13 files changed, 113 insertions, 118 deletions
diff --git a/Docs/manual.texi b/Docs/manual.texi
index 73e1449fcb4..51f1b7b66d8 100644
--- a/Docs/manual.texi
+++ b/Docs/manual.texi
@@ -29167,8 +29167,6 @@ mysql> select STRCMP('text', 'text');
relevance - similarity measure between the text in columns
@code{(col1,col2,...)} and the query @code{expr}. Relevance is a
positive floating-point number. Zero relevance means no similarity.
-For @code{MATCH ... AGAINST()} to work, a @strong{FULLTEXT} index
-must be created first. @xref{CREATE TABLE, , @code{CREATE TABLE}}.
@code{MATCH ... AGAINST()} is available in MySQL version
3.23.23 or later. @code{IN BOOLEAN MODE} extension was added in version
4.0.1. For details and usage examples @pxref{Fulltext Search}.
@@ -33828,9 +33826,10 @@ mysql> SELECT * FROM articles WHERE MATCH (title,body) AGAINST (
This query retrieved all the rows that contain the word @code{MySQL}
(note: 50% threshold is gone), but does @strong{not} contain the word
-@code{YourSQL}. Note that it does not auto-magically sort rows in
+@code{YourSQL}. Note, that it does not auto-magically sort rows in
derceasing relevance order (the last row has the highest relevance,
-as it contains @code{MySQL} twice).
+as it contains @code{MySQL} twice). Boolean fulltext search can also
+work even without @code{FULLTEXT} index, but it would be @strong{slow}.
Boolean fulltext search supports the following operators:
@@ -33890,10 +33889,12 @@ order), but rank ``gates to hell'' higher than ``bill gates''.
@itemize @bullet
@item
All parameters to the @code{MATCH} function must be columns from the
-same table that is part of the same fulltext index.
+same table that is part of the same fulltext index, unless this
+@code{MATCH} is @code{IN BOOLEAN MODE}.
@item
Column list between @code{MATCH} and @code{AGAINST} must match exactly
-a column list in the @code{FULLTEXT} index definition.
+a column list in the @code{FULLTEXT} index definition, unless this
+@code{MATCH} is @code{IN BOOLEAN MODE}.
@item
The argument to @code{AGAINST} must be a constant string.
@end itemize
@@ -45853,6 +45854,9 @@ Our TODO section contains what we plan to have in 4.0. @xref{TODO MySQL 4.0}.
@itemize @bullet
@item
+@code{MATCH ... AGAINST(... IN BOOLEAN MODE)} can now work
+without @code{FULLTEXT} index.
+@item
Added @file{myisam/ft_dump} utility for low-level inspection
of @code{FULLTEXT} indexes.
@item
diff --git a/include/ft_global.h b/include/ft_global.h
index 52fb8d38a9a..064dd7a6538 100644
--- a/include/ft_global.h
+++ b/include/ft_global.h
@@ -32,7 +32,7 @@ extern "C" {
typedef struct st_ft_info FT_INFO;
struct _ft_vft {
int (*read_next)(FT_INFO *, char *);
- float (*find_relevance)(FT_INFO *, my_off_t, byte *);
+ float (*find_relevance)(FT_INFO *, byte *, uint);
void (*close_search)(FT_INFO *);
float (*get_relevance)(FT_INFO *);
void (*reinit_search)(FT_INFO *);
diff --git a/include/my_base.h b/include/my_base.h
index adb2366f454..abd2ac602e5 100644
--- a/include/my_base.h
+++ b/include/my_base.h
@@ -226,6 +226,7 @@ enum ha_base_keytype {
/* Other constants */
#define HA_NAMELEN 64 /* Max length of saved filename */
+#define NO_SUCH_KEY ((uint)~0) /* used as a key no. */
/* Intern constants in databases */
diff --git a/myisam/ft_boolean_search.c b/myisam/ft_boolean_search.c
index 70ba7fc9df2..0055842c24b 100644
--- a/myisam/ft_boolean_search.c
+++ b/myisam/ft_boolean_search.c
@@ -152,13 +152,16 @@ void _ftb_init_index_search(FT_INFO *ftb)
int i, r;
FTB_WORD *ftbw;
MI_INFO *info=ftb->info;
- MI_KEYDEF *keyinfo=info->s->keyinfo+ftb->keynr;
- my_off_t keyroot=info->s->state.key_root[ftb->keynr];
+ MI_KEYDEF *keyinfo;
+ my_off_t keyroot;
- if (ftb->state != READY)
+ if (ftb->state != READY || ftb->keynr == NO_SUCH_KEY)
return;
ftb->state=INDEX_SEARCH;
+ keyinfo=info->s->keyinfo+ftb->keynr;
+ keyroot=info->s->state.key_root[ftb->keynr];
+
for (i=ftb->queue.elements; i; i--)
{
ftbw=(FTB_WORD *)(ftb->queue.root[i]);
@@ -352,14 +355,17 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record)
return my_errno=HA_ERR_END_OF_FILE;
}
-float ft_boolean_find_relevance(FT_INFO *ftb, my_off_t docid, byte *record)
+float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length)
{
TREE ptree;
FT_WORD word;
FTB_WORD *ftbw;
FTB_EXPR *ftbe;
uint i;
+ my_off_t docid=ftb->info->lastpos;
+ if (docid == HA_POS_ERROR)
+ return -2.0;
if (ftb->state == READY || ftb->state == INDEX_DONE)
{
for (i=1; i<=ftb->queue.elements; i++)
@@ -382,11 +388,13 @@ float ft_boolean_find_relevance(FT_INFO *ftb, my_off_t docid, byte *record)
ftb->state=SCAN;
}
else if (ftb->state != SCAN)
- return -2.0;
+ return -3.0;
bzero(&ptree, sizeof(ptree));
- if (_mi_ft_parse(& ptree, ftb->info, ftb->keynr, record))
- return -3.0;
+ if ((ftb->keynr==NO_SUCH_KEY)
+ ? ft_parse(& ptree, record, length)
+ : _mi_ft_parse(& ptree, ftb->info, ftb->keynr, record))
+ return -4.0;
for (i=1; i<=ftb->queue.elements; i++)
{
diff --git a/myisam/ft_dump.c b/myisam/ft_dump.c
index 2c85669ff0e..940164f89c5 100644
--- a/myisam/ft_dump.c
+++ b/myisam/ft_dump.c
@@ -159,7 +159,7 @@ err:
return 0;
}
-const char *options="dscve:h";
+const char *options="dscvh";
static void get_options(int argc, char *argv[])
{
@@ -184,7 +184,15 @@ static void get_options(int argc, char *argv[])
static void usage(char *argv[])
{
- printf("Use: %s [-%s] <table_name> <key_no>\n", *argv, options);
+ printf("
+Use: %s [-%s] <table_name> <index_no>
+
+-d dump index (incl. data offsets and word weights)
+-s report global stats
+-c calculate per-word stats (counts and global weights)
+-v be verbose
+-h this text\n
+", *argv, options);
exit(1);
}
diff --git a/myisam/ft_nlq_search.c b/myisam/ft_nlq_search.c
index 2a55ff839ca..5bb2ffab939 100644
--- a/myisam/ft_nlq_search.c
+++ b/myisam/ft_nlq_search.c
@@ -169,7 +169,7 @@ static int FT_DOC_cmp(FT_DOC *a, FT_DOC *b)
FT_INFO *ft_init_nlq_search(MI_INFO *info, uint keynr, byte *query,
uint query_len, my_bool presort)
{
- TREE *wtree, allocated_wtree;
+ TREE allocated_wtree, *wtree=&allocated_wtree;
ALL_IN_ONE aio;
FT_DOC *dptr;
FT_INFO *dlist=NULL;
@@ -193,7 +193,7 @@ FT_INFO *ft_init_nlq_search(MI_INFO *info, uint keynr, byte *query,
init_tree(&aio.dtree,0,0,sizeof(FT_SUPERDOC),(qsort_cmp2)&FT_SUPERDOC_cmp,0,
NULL, NULL);
- if(!(wtree=ft_parse(&allocated_wtree,query,query_len)))
+ if(ft_parse(&allocated_wtree,query,query_len))
goto err;
if(tree_walk(wtree, (tree_walk_action)&walk_and_match, &aio,
@@ -247,11 +247,15 @@ int ft_nlq_read_next(FT_INFO *handler, char *record)
return my_errno;
}
-float ft_nlq_find_relevance(FT_INFO *handler, my_off_t docid,
- byte *record __attribute__((unused)))
+float ft_nlq_find_relevance(FT_INFO *handler,
+ byte *record __attribute__((unused)), uint length __attribute__((unused)))
{
int a,b,c;
FT_DOC *docs=handler->doc;
+ my_off_t docid=handler->info->lastpos;
+
+ if (docid == HA_POS_ERROR)
+ return -5.0;
/* Assuming docs[] is sorted by dpos... */
diff --git a/myisam/ft_parser.c b/myisam/ft_parser.c
index 35e5959b556..0d1495da548 100644
--- a/myisam/ft_parser.c
+++ b/myisam/ft_parser.c
@@ -206,7 +206,7 @@ byte ft_simple_get_word(byte **start, byte *end, FT_WORD *word)
return 0;
}
-TREE * ft_parse(TREE *wtree, byte *doc, int doclen)
+int ft_parse(TREE *wtree, byte *doc, int doclen)
{
byte *end=doc+doclen;
FT_WORD w;
@@ -221,10 +221,10 @@ TREE * ft_parse(TREE *wtree, byte *doc, int doclen)
if (!tree_insert(wtree, &w, 0))
goto err;
}
- return wtree;
+ return 0;
err:
delete_tree(wtree);
- return NULL;
+ return 1;
}
diff --git a/myisam/ft_update.c b/myisam/ft_update.c
index 89da06e673f..1bc0ace6c77 100644
--- a/myisam/ft_update.c
+++ b/myisam/ft_update.c
@@ -28,7 +28,7 @@
/**************************************************************/
-/* parses a document i.e. calls _mi_ft_parse for every keyseg */
+/* parses a document i.e. calls ft_parse for every keyseg */
uint _mi_ft_parse(TREE *parsed, MI_INFO *info, uint keynr, const byte *record)
{
byte *pos;
@@ -57,11 +57,11 @@ uint _mi_ft_parse(TREE *parsed, MI_INFO *info, uint keynr, const byte *record)
}
else
len=keyseg->length;
- if (!(ft_parse(parsed, pos, len)))
+ if (ft_parse(parsed, pos, len))
return 1;
}
/* Handle the case where all columns are NULL */
- if (!is_tree_inited(parsed) && !(ft_parse(parsed, (byte*) "", 0)))
+ if (!is_tree_inited(parsed) && ft_parse(parsed, (byte*) "", 0))
return 1;
else
return 0;
diff --git a/myisam/ftdefs.h b/myisam/ftdefs.h
index fd3660edcba..147c3f5b5e6 100644
--- a/myisam/ftdefs.h
+++ b/myisam/ftdefs.h
@@ -120,14 +120,14 @@ uint _ft_make_key(MI_INFO *, uint , byte *, FT_WORD *, my_off_t);
byte ft_get_word(byte **, byte *, FT_WORD *, FTB_PARAM *);
byte ft_simple_get_word(byte **, byte *, FT_WORD *);
-TREE * ft_parse(TREE *, byte *, int);
+int ft_parse(TREE *, byte *, int);
FT_WORD * ft_linearize(/*MI_INFO *, uint, byte *, */TREE *);
FT_WORD * _mi_ft_parserecord(MI_INFO *, uint, byte *, const byte *);
const struct _ft_vft _ft_vft_nlq;
FT_INFO *ft_init_nlq_search(MI_INFO *, uint, byte *, uint, my_bool);
int ft_nlq_read_next(FT_INFO *, char *);
-float ft_nlq_find_relevance(FT_INFO *, my_off_t, byte *);
+float ft_nlq_find_relevance(FT_INFO *, byte *, uint);
void ft_nlq_close_search(FT_INFO *);
float ft_nlq_get_relevance(FT_INFO *);
my_off_t ft_nlq_get_docid(FT_INFO *);
@@ -136,7 +136,7 @@ void ft_nlq_reinit_search(FT_INFO *);
const struct _ft_vft _ft_vft_boolean;
FT_INFO *ft_init_boolean_search(MI_INFO *, uint, byte *, uint, my_bool);
int ft_boolean_read_next(FT_INFO *, char *);
-float ft_boolean_find_relevance(FT_INFO *, my_off_t, byte *);
+float ft_boolean_find_relevance(FT_INFO *, byte *, uint);
void ft_boolean_close_search(FT_INFO *);
float ft_boolean_get_relevance(FT_INFO *);
my_off_t ft_boolean_get_docid(FT_INFO *);
diff --git a/mysql-test/t/fulltext.test b/mysql-test/t/fulltext.test
index 81993b167c6..ab3fc194891 100644
--- a/mysql-test/t/fulltext.test
+++ b/mysql-test/t/fulltext.test
@@ -10,9 +10,15 @@ INSERT INTO t1 VALUES('MySQL has now support', 'for full-text search'),
('Only MyISAM tables','support collections'),
('Function MATCH ... AGAINST()','is used to do a search'),
('Full-text search in MySQL', 'implements vector space model');
+
+# nl search
+
select * from t1 where MATCH(a,b) AGAINST ("collections");
select * from t1 where MATCH(a,b) AGAINST ("indexes");
select * from t1 where MATCH(a,b) AGAINST ("indexes collections");
+
+# boolean search
+
select * from t1 where MATCH(a,b) AGAINST("support -collections" IN BOOLEAN MODE);
select * from t1 where MATCH(a,b) AGAINST("support collections" IN BOOLEAN MODE);
select * from t1 where MATCH(a,b) AGAINST("support +collections" IN BOOLEAN MODE);
@@ -22,6 +28,13 @@ select * from t1 where MATCH(a,b) AGAINST("+search" IN BOOLEAN MODE);
select * from t1 where MATCH(a,b) AGAINST("+search +(support vector)" IN BOOLEAN MODE);
select * from t1 where MATCH(a,b) AGAINST("+search -(support vector)" IN BOOLEAN MODE);
select *, MATCH(a,b) AGAINST("support collections" IN BOOLEAN MODE) as x from t1;
+
+# boolean w/o index:
+
+select * from t1 where MATCH a AGAINST ("search" IN BOOLEAN MODE);
+
+#update/delete with fulltext index
+
delete from t1 where a like "MySQL%";
update t1 set a='some test foobar' where MATCH a,b AGAINST ('model');
delete from t1 where MATCH(a,b) AGAINST ("indexes");
diff --git a/sql/item_func.cc b/sql/item_func.cc
index 86cc3283955..fe68d8f47c2 100644
--- a/sql/item_func.cc
+++ b/sql/item_func.cc
@@ -2004,6 +2004,9 @@ void Item_func_match::init_search(bool no_order)
return;
}
+ if (key == NO_SUCH_KEY)
+ concat=new Item_func_concat_ws (new Item_string(" ",1), fields);
+
String *ft_tmp=0;
char tmp1[FT_QUERY_MAXLEN];
String tmp2(tmp1,sizeof(tmp1));
@@ -2015,7 +2018,8 @@ void Item_func_match::init_search(bool no_order)
tmp2.set("",0);
}
- ft_handler_init(ft_tmp->ptr(), ft_tmp->length(), join_key && !no_order);
+ ft_handler=table->file->ft_init_ext(mode, key,
+ ft_tmp->ptr(), ft_tmp->length(), join_key && !no_order);
if (join_key)
{
@@ -2032,12 +2036,11 @@ bool Item_func_match::fix_fields(THD *thd,struct st_table_list *tlist)
maybe_null=1;
join_key=0;
- /* Serg:
- I'd rather say now that const_item is assumed in quite a bit of
- places, so it would be difficult to remove; If it would ever to be
- removed, this should include modifications to find_best and auto_close
- as complement to auto_init code above.
- */
+ /* const_item is assumed in quite a bit of places, so it would be difficult
+ to remove; If it would ever to be removed, this should include
+ modifications to find_best and auto_close as complement to auto_init code
+ above.
+ */
if (Item_func::fix_fields(thd,tlist) || !const_item())
{
my_error(ER_WRONG_ARGUMENTS,MYF(0),"AGAINST");
@@ -2051,21 +2054,20 @@ bool Item_func_match::fix_fields(THD *thd,struct st_table_list *tlist)
if (item->type() == Item::REF_ITEM)
li.replace(item= *((Item_ref *)item)->ref);
if (item->type() != Item::FIELD_ITEM || !item->used_tables())
- {
- my_error(ER_WRONG_ARGUMENTS,MYF(0),"MATCH");
- return 1;
- }
+ key=NO_SUCH_KEY;
used_tables_cache|=item->used_tables();
}
/* check that all columns come from the same table */
if (count_bits(used_tables_cache) != 1)
+ key=NO_SUCH_KEY;
+ const_item_cache=0;
+ table=((Item_field *)fields.head())->field->table;
+ record=table->record[0];
+ if (key == NO_SUCH_KEY && mode != FT_BOOL)
{
my_error(ER_WRONG_ARGUMENTS,MYF(0),"MATCH");
return 1;
}
- const_item_cache=0;
- table=((Item_field *)fields.head())->field->table;
- record=table->record[0];
return 0;
}
@@ -2074,6 +2076,10 @@ bool Item_func_match::fix_index()
List_iterator_fast<Item> li(fields);
Item_field *item;
uint ft_to_key[MAX_KEY], ft_cnt[MAX_KEY], fts=0, key;
+ uint max_cnt=0, mkeys=0;
+
+ if (this->key == NO_SUCH_KEY)
+ return 0;
for (key=0 ; key<table->keys ; key++)
{
@@ -2087,11 +2093,7 @@ bool Item_func_match::fix_index()
}
if (!fts)
- {
- my_printf_error(ER_FT_MATCHING_KEY_NOT_FOUND,
- ER(ER_FT_MATCHING_KEY_NOT_FOUND),MYF(0));
- return 1;
- }
+ goto err;
while ((item=(Item_field*)(li++)))
{
@@ -2108,7 +2110,6 @@ bool Item_func_match::fix_index()
}
}
- uint max_cnt=0, mkeys=0;
for (key=0 ; key<fts ; key++)
{
if (ft_cnt[key] > max_cnt)
@@ -2139,6 +2140,12 @@ bool Item_func_match::fix_index()
return 0;
}
+err:
+ if (mode == FT_BOOL)
+ {
+ this->key=NO_SUCH_KEY;
+ return 0;
+ }
my_printf_error(ER_FT_MATCHING_KEY_NOT_FOUND,
ER(ER_FT_MATCHING_KEY_NOT_FOUND),MYF(0));
return 1;
@@ -2174,61 +2181,18 @@ double Item_func_match::val()
join_key=0;
}
- my_off_t docid=table->file->row_position();
-
- if ((null_value=(docid==HA_OFFSET_ERROR)))
- return 0.0;
- else
- return ft_handler->please->find_relevance(ft_handler, docid, record);
-}
-
-#if 0
-double Item_func_match_nl::val()
-{
- if (ft_handler==NULL)
- init_search(1);
-
- if ((null_value= (ft_handler==NULL)))
- return 0.0;
-
- if (join_key)
+ if (key == NO_SUCH_KEY)
{
- if (table->file->ft_handler)
- return ft_handler->please->get_relevance(ft_handler);
-
- join_key=0;
+ String *a=concat->val_str(&value);
+ if (null_value=(a==0))
+ return 0;
+ return ft_handler->please->find_relevance(ft_handler,
+ (byte *)a->ptr(), a->length());
}
-
- my_off_t docid=table->file->row_position();
-
- if ((null_value=(docid==HA_OFFSET_ERROR)))
- return 0.0;
else
- return ft_handler->please->find_relevance(ft_handler, docid, record);
+ return ft_handler->please->find_relevance(ft_handler, record, 0);
}
-double Item_func_match_bool::val()
-{
- if (ft_handler==NULL)
- init_search(1);
-
- if ((null_value= (ft_handler==NULL)))
- return 0.0;
-
- if (join_key)
- {
- if (table->file->ft_handler)
- return ft_handler->please->get_relevance(ft_handler);
-
- join_key=0;
- }
-
- return ft_handler->please->find_relevance(ft_handler, docid, record);
- //null_value=1;
- //return -1.0;
-}
-#endif
-
/***************************************************************************
System variables
This has to be recoded after we get more than 3 system variables
diff --git a/sql/item_func.h b/sql/item_func.h
index 182daf9f74e..2bf272f24ed 100644
--- a/sql/item_func.h
+++ b/sql/item_func.h
@@ -862,15 +862,18 @@ class Item_func_match :public Item_real_func
{
public:
List<Item> fields;
+ Item *concat;
+ String value;
TABLE *table;
- uint key;
+ uint key, mode;
bool join_key;
Item_func_match *master;
FT_INFO * ft_handler;
byte *record;
Item_func_match(List<Item> &a, Item *b): Item_real_func(b),
- fields(a), table(0), join_key(0), master(0), ft_handler(0) {}
+ fields(a), table(0), join_key(0), master(0), ft_handler(0),
+ key(0), concat(0) {}
~Item_func_match()
{
if (!master && ft_handler)
@@ -880,8 +883,8 @@ public:
if(join_key)
table->file->ft_handler=0;
}
+ if (concat) delete concat;
}
- virtual int ft_handler_init(const byte *key, uint keylen, bool presort) =0;
enum Functype functype() const { return FT_FUNC; }
void update_used_tables() {}
bool fix_fields(THD *thd,struct st_table_list *tlist);
@@ -896,26 +899,16 @@ public:
class Item_func_match_nl :public Item_func_match
{
public:
- Item_func_match_nl(List<Item> &a, Item *b): Item_func_match(a,b) {}
+ Item_func_match_nl(List<Item> &a, Item *b):
+ Item_func_match(a,b) { mode=FT_NL; }
const char *func_name() const { return "match_nl"; }
-// double val();
- int ft_handler_init(const byte *query, uint querylen, bool presort)
- {
- ft_handler=table->file->ft_init_ext(FT_NL,key, query, querylen, presort);
- return 0;
- }
};
class Item_func_match_bool :public Item_func_match
{
public:
- Item_func_match_bool(List<Item> &a, Item *b): Item_func_match(a,b) {}
+ Item_func_match_bool(List<Item> &a, Item *b):
+ Item_func_match(a,b) { mode=FT_BOOL; }
const char *func_name() const { return "match_bool"; }
-// double val();
- int ft_handler_init(const byte *query, uint querylen, bool presort)
- {
- ft_handler=table->file->ft_init_ext(FT_BOOL,key, query, querylen, presort);
- return 0;
- }
};
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index abb1d891166..4890e8fd6c6 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -1457,7 +1457,7 @@ add_ft_keys(DYNAMIC_ARRAY *keyuse_array,
{
Item *item;
/*
- I, (Sergei) too lazy to implement proper recursive descent here,
+ I'm (Sergei) too lazy to implement proper recursive descent here,
and anyway, nobody will use such a stupid queries
that will require it :-)
May be later...
@@ -1474,7 +1474,7 @@ add_ft_keys(DYNAMIC_ARRAY *keyuse_array,
}
}
- if (!cond_func)
+ if (!cond_func || cond_func->key == NO_SUCH_KEY)
return;
KEYUSE keyuse;