diff options
author | unknown <serg@serg.mysql.com> | 2001-04-17 21:30:02 +0200 |
---|---|---|
committer | unknown <serg@serg.mysql.com> | 2001-04-17 21:30:02 +0200 |
commit | 17ef8ef4443598265bca23d86d04fcc275da7804 (patch) | |
tree | 6c294f797e941fcb6156771d2a0f0dd4393d06bb | |
parent | 09f0384e7abce9d9cb744d0a20b897b5a77ef747 (diff) | |
download | mariadb-git-17ef8ef4443598265bca23d86d04fcc275da7804.tar.gz |
ft_* variables added
-rw-r--r-- | Docs/manual.texi | 44 | ||||
-rw-r--r-- | include/ft_global.h | 5 | ||||
-rw-r--r-- | myisam/ft_dump.c | 2 | ||||
-rw-r--r-- | myisam/ft_parser.c | 4 | ||||
-rw-r--r-- | myisam/ft_static.c | 4 | ||||
-rw-r--r-- | myisam/ft_stopwords.c | 2 | ||||
-rw-r--r-- | myisam/ftdefs.h | 4 | ||||
-rw-r--r-- | myisam/fulltext.h | 1 | ||||
-rw-r--r-- | myisam/mi_check.c | 4 | ||||
-rw-r--r-- | myisam/myisamchk.c | 6 | ||||
-rw-r--r-- | mysql-test/r/fulltext_var.result | 4 | ||||
-rw-r--r-- | mysql-test/t/fulltext_var.test | 5 | ||||
-rw-r--r-- | sql/mysqld.cc | 9 |
13 files changed, 77 insertions, 17 deletions
diff --git a/Docs/manual.texi b/Docs/manual.texi index 62b9bdb2134..af5ab163e6a 100644 --- a/Docs/manual.texi +++ b/Docs/manual.texi @@ -21342,6 +21342,9 @@ differ somewhat: | delayed_queue_size | 1000 | | flush | OFF | | flush_time | 0 | +| ft_min_word_len | 4 | +| ft_max_word_len | 254 | +| ft_max_word_len_for_sort| 20 | | have_bdb | YES | | have_gemini | NO | | have_innodb | YES | @@ -21525,6 +21528,31 @@ tables will be closed (to free up resources and sync things to disk). We only recommend this option on Win95, Win98, or on systems where you have very little resources. +@item @code{ft_min_word_len} +The minimum length of the word to be included in a @code{FULLTEXT} index. +@strong{Note: @code{FULLTEXT} index have to be rebuilt after changing +this variable.} + +@item @code{ft_max_word_len} +The maximum length of the word to be included in a @code{FULLTEXT} index. +@strong{Note: @code{FULLTEXT} index have to be rebuilt after changing +this variable.} + +@item @code{ft_max_word_len_sort} +The maximum length of the word in a @code{FULLTEXT} index +to be used in fast index recreation method in +@code{REPAIR}, @code{CREATE INDEX}, or +@code{ALTER TABLE}. Longer words are inserted the slow way. +The rule of the thumb is as follows: with @code{ft_max_word_len_sort} +increasing, @strong{MySQL} will create bigger temporary files +(thus slowing the process down, due to disk I/O), and will put +fewer keys in one sort block (againg, decreasing the efficiency). +When @code{ft_max_word_len_sort} is too small, instead, +@strong{MySQL} will insert a lot of words into index the slow way - +but short words will be inserted very fast. It applies only to +Index recreation during @code{REPAIR}, @code{CREATE INDEX}, or +@code{ALTER TABLE}. + @item @code{have_bdb} @code{YES} if @code{mysqld} supports Berkeley DB tables. @code{DISABLED} if @code{--skip-bdb} is used. @@ -28279,12 +28307,9 @@ unless you know what you are doing! @itemize @item -Minimal length of word to be indexed is defined in -@code{myisam/ftdefs.h} file by the line -@example -#define MIN_WORD_LEN 4 -@end example -Change it to the value you prefer, recompile @strong{MySQL}, and rebuild +Minimal length of word to be indexed is defined by @strong{MySQL} +variable @code{ft_min_word_length}. @xref{SHOW VARIABLES}. +Change it to the value you prefer, and rebuild your @code{FULLTEXT} indexes. @item @@ -42463,6 +42488,8 @@ Responsible for @strong{MySQL} configure. Full-text search. @item Added keys to the @code{MERGE} library. +@item +@code{HANDLER} command. @end itemize @item Jeremy Cole @@ -42801,6 +42828,8 @@ Our TODO section contains what we plan to have in 4.0. @xref{TODO MySQL 4.0}. @itemize @bullet @item +Added @code{HANDLER} command. +@item Added @code{SQL_CALC_FOUND_ROWS} and @code{FOUND_ROWS()}. This make it possible to know how many rows a query would have returned if one hadn't used @code{LIMIT}. @@ -42903,6 +42932,9 @@ not yet 100% confident in this code. @appendixsubsec Changes in release 3.23.37 @itemize @bullet @item +Added variables @code{ft_min_word_len}, @code{ft_max_word_len}, and +@code{ft_max_word_len_for_sort}. +@item Changed @code{INNOBASE} to @code{INNODB} (because the @code{INNOBASE} name was already used). Note that all @code{configure} options and @code{mysqld} start options are now using @code{innodb} instead of diff --git a/include/ft_global.h b/include/ft_global.h index bca13271f34..415c0884989 100644 --- a/include/ft_global.h +++ b/include/ft_global.h @@ -27,6 +27,7 @@ extern "C" { #endif #define FT_QUERY_MAXLEN 1024 +#define HA_FT_MAXLEN 254 typedef struct ft_doc_rec { my_off_t dpos; @@ -42,6 +43,10 @@ typedef struct st_ft_doclist { extern const char *ft_precompiled_stopwords[]; +extern uint ft_min_word_len; +extern uint ft_max_word_len; +extern uint ft_max_word_len_for_sort; + int ft_init_stopwords(const char **); void ft_free_stopwords(void); diff --git a/myisam/ft_dump.c b/myisam/ft_dump.c index af49d834d0f..62e2a67dfc2 100644 --- a/myisam/ft_dump.c +++ b/myisam/ft_dump.c @@ -25,7 +25,7 @@ static void complain(int val); static int count=0, stats=0, dump=0, verbose=0; static char *query=NULL; -#define MAX (MAX_WORD_LEN+10) +#define MAX (HA_FT_MAXLEN+10) #define HOW_OFTEN_TO_WRITE 1000 int main(int argc,char *argv[]) diff --git a/myisam/ft_parser.c b/myisam/ft_parser.c index 09c93d7dc5b..d156c8c5556 100644 --- a/myisam/ft_parser.c +++ b/myisam/ft_parser.c @@ -164,7 +164,7 @@ byte ft_get_word(byte **start, byte *end, FT_WORD *word, FTB_PARAM *param) if ((param->trunc=(doc<end && *doc == FTB_TRUNC))) doc++; - if (word->len >= MIN_WORD_LEN && word->len < MAX_WORD_LEN && + if (word->len >= ft_min_word_len && word->len < ft_max_word_len && !is_stopword(word->pos, word->len)) { *start=doc; @@ -195,7 +195,7 @@ byte ft_simple_get_word(byte **start, byte *end, FT_WORD *word) word->len= (uint)(doc-word->pos) - mwc; - if (word->len >= MIN_WORD_LEN && word->len < MAX_WORD_LEN && + if (word->len >= ft_min_word_len && word->len < ft_max_word_len && !is_stopword(word->pos, word->len)) { *start=doc; diff --git a/myisam/ft_static.c b/myisam/ft_static.c index 00d9d4ed19a..09afadec23f 100644 --- a/myisam/ft_static.c +++ b/myisam/ft_static.c @@ -18,6 +18,10 @@ #include "ftdefs.h" +uint ft_min_word_len=4; +uint ft_max_word_len=HA_FT_MAXLEN; +uint ft_max_word_len_for_sort=20; + const MI_KEYSEG ft_keysegs[FT_SEGS]={ { HA_KEYTYPE_VARTEXT, /* type */ diff --git a/myisam/ft_stopwords.c b/myisam/ft_stopwords.c index d796b87ed71..556f52650c8 100644 --- a/myisam/ft_stopwords.c +++ b/myisam/ft_stopwords.c @@ -48,7 +48,7 @@ int ft_init_stopwords(const char **sws) for(;*sws;sws++) { - if( (sw.len= (uint) strlen(sw.pos=*sws)) < MIN_WORD_LEN) continue; + if( (sw.len= (uint) strlen(sw.pos=*sws)) < ft_min_word_len) continue; if(!tree_insert(stopwords3, &sw, 0)) { delete_tree(stopwords3); /* purecov: inspected */ diff --git a/myisam/ftdefs.h b/myisam/ftdefs.h index b0aeb652b36..6721d136678 100644 --- a/myisam/ftdefs.h +++ b/myisam/ftdefs.h @@ -22,10 +22,6 @@ #include <m_ctype.h> #include <my_tree.h> -#define MIN_WORD_LEN 4 -#define MAX_WORD_LEN HA_FT_MAXLEN -#define MAX_WORD_LEN_FOR_SORT 20 - #define HYPHEN_IS_DELIM #define HYPHEN_IS_CONCAT /* not used for now */ diff --git a/myisam/fulltext.h b/myisam/fulltext.h index 8fcac8172b1..f787c9bcfe8 100644 --- a/myisam/fulltext.h +++ b/myisam/fulltext.h @@ -24,7 +24,6 @@ /* shoudn't be def'ed when linking with mysql */ #undef EVAL_RUN -#define HA_FT_MAXLEN 254 #define HA_FT_WTYPE HA_KEYTYPE_FLOAT #define HA_FT_WLEN 4 #ifdef EVAL_RUN diff --git a/myisam/mi_check.c b/myisam/mi_check.c index d588da23d84..b19a3ffdfbc 100644 --- a/myisam/mi_check.c +++ b/myisam/mi_check.c @@ -1875,10 +1875,10 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info, if (sort_info->keyinfo->flag & HA_FULLTEXT) { sort_param.max_records=sort_info->max_records= - (ha_rows) (sort_info->filelength/MAX_WORD_LEN_FOR_SORT+1); + (ha_rows) (sort_info->filelength/ft_max_word_len_for_sort+1); sort_param.key_read=sort_ft_key_read; - sort_param.key_length+=MAX_WORD_LEN_FOR_SORT-MAX_WORD_LEN; + sort_param.key_length+=ft_max_word_len_for_sort-ft_max_word_len; } else sort_param.key_read=sort_key_read; diff --git a/myisam/myisamchk.c b/myisam/myisamchk.c index cbdd6a26767..cff5f781538 100644 --- a/myisam/myisamchk.c +++ b/myisam/myisamchk.c @@ -152,6 +152,12 @@ static CHANGEABLE_VAR changeable_vars[] = { { "sort_key_blocks",(long*) &check_param.sort_key_blocks,BUFFERS_WHEN_SORTING,4L,100L,0L, 1L }, { "decode_bits",(long*) &decode_bits,9L,4L,17L,0L,1L }, + { "ft_min_word_len", (long*) &ft_min_word_len, + 4, 1, HA_FT_MAXLEN, 0, 1 }, + { "ft_max_word_len", (long*) &ft_max_word_len, + HA_FT_MAXLEN, 10, HA_FT_MAXLEN, 0, 1 }, + { "ft_max_word_len_for_sort",(long*) &ft_max_word_len_for_sort, + 20, 4, HA_FT_MAXLEN, 0, 1 }, { NullS,(long*) 0,0L,0L,0L,0L,0L,} }; enum options {OPT_CHARSETS_DIR=256, OPT_SET_CHARSET,OPT_START_CHECK_POS}; diff --git a/mysql-test/r/fulltext_var.result b/mysql-test/r/fulltext_var.result new file mode 100644 index 00000000000..0352cd7addf --- /dev/null +++ b/mysql-test/r/fulltext_var.result @@ -0,0 +1,4 @@ +Variable_name Value +ft_min_word_len 4 +ft_max_word_len 254 +ft_max_word_len_for_sort 20 diff --git a/mysql-test/t/fulltext_var.test b/mysql-test/t/fulltext_var.test new file mode 100644 index 00000000000..71213d1195a --- /dev/null +++ b/mysql-test/t/fulltext_var.test @@ -0,0 +1,5 @@ +# +# Fulltext configurable parameters +# + +show variables like "ft\_%"; diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 3bdd1ff6826..bbda3eba1bb 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -2664,6 +2664,12 @@ CHANGEABLE_VAR changeable_vars[] = { DELAYED_QUEUE_SIZE, 1, ~0L, 0, 1 }, { "flush_time", (long*) &flush_time, FLUSH_TIME, 0, ~0L, 0, 1 }, + { "ft_min_word_len", (long*) &ft_min_word_len, + 4, 1, HA_FT_MAXLEN, 0, 1 }, + { "ft_max_word_len", (long*) &ft_max_word_len, + HA_FT_MAXLEN, 10, HA_FT_MAXLEN, 0, 1 }, + { "ft_max_word_len_for_sort",(long*) &ft_max_word_len_for_sort, + 20, 4, HA_FT_MAXLEN, 0, 1 }, #ifdef HAVE_GEMINI_DB { "gemini_buffer_cache", (long*) &gemini_buffer_cache, 128 * 8192, 16, LONG_MAX, 0, 1 }, @@ -2804,6 +2810,9 @@ struct show_var_st init_vars[]= { {"delayed_queue_size", (char*) &delayed_queue_size, SHOW_LONG}, {"flush", (char*) &myisam_flush, SHOW_MY_BOOL}, {"flush_time", (char*) &flush_time, SHOW_LONG}, + {"ft_min_word_len", (char*) &ft_min_word_len, SHOW_LONG}, + {"ft_max_word_len", (char*) &ft_max_word_len, SHOW_LONG}, + {"ft_max_word_len_for_sort",(char*) &ft_max_word_len_for_sort, SHOW_LONG}, #ifdef HAVE_GEMINI_DB {"gemini_buffer_cache", (char*) &gemini_buffer_cache, SHOW_LONG}, {"gemini_connection_limit", (char*) &gemini_connection_limit, SHOW_LONG}, |