ft_* variables added

author: unknown <serg@serg.mysql.com> 2001-04-17 21:30:02 +0200
committer: unknown <serg@serg.mysql.com> 2001-04-17 21:30:02 +0200
commit: 17ef8ef4443598265bca23d86d04fcc275da7804 (patch)
tree: 6c294f797e941fcb6156771d2a0f0dd4393d06bb
parent: 09f0384e7abce9d9cb744d0a20b897b5a77ef747 (diff)
download: mariadb-git-17ef8ef4443598265bca23d86d04fcc275da7804.tar.gz
13 files changed, 77 insertions, 17 deletions
diff --git a/Docs/manual.texi b/Docs/manual.texi
index 62b9bdb2134..af5ab163e6a 100644
--- a/Docs/manual.texi
+++ b/Docs/manual.texi
@@ -21342,6 +21342,9 @@ differ somewhat:
 | delayed_queue_size      | 1000                      |
 | flush                   | OFF                       |
 | flush_time              | 0                         |
+| ft_min_word_len         | 4                         |
+| ft_max_word_len         | 254                       |
+| ft_max_word_len_for_sort| 20                        |
 | have_bdb                | YES                       |
 | have_gemini             | NO                        |
 | have_innodb             | YES                       |
@@ -21525,6 +21528,31 @@ tables will be closed (to free up resources and sync things to disk). We
 only recommend this option on Win95, Win98, or on systems where you have
 very little resources.
 
+@item @code{ft_min_word_len}
+The minimum length of the word to be included in a @code{FULLTEXT} index.
+@strong{Note: @code{FULLTEXT} index have to be rebuilt after changing
+this variable.}
+
+@item @code{ft_max_word_len}
+The maximum length of the word to be included in a @code{FULLTEXT} index.
+@strong{Note: @code{FULLTEXT} index have to be rebuilt after changing
+this variable.}
+
+@item @code{ft_max_word_len_sort}
+The maximum length of the word in a @code{FULLTEXT} index
+to be used in fast index recreation method in
+@code{REPAIR}, @code{CREATE INDEX},  or
+@code{ALTER TABLE}.  Longer words are inserted the slow way.
+The rule of the thumb is as follows: with @code{ft_max_word_len_sort}
+increasing, @strong{MySQL} will create bigger temporary files
+(thus slowing the process down, due to disk I/O), and will put
+fewer keys in one sort block (againg, decreasing the efficiency).
+When @code{ft_max_word_len_sort} is too small, instead,
+@strong{MySQL} will insert a lot of words into index the slow way -
+but short words will be inserted very fast. It applies only to
+Index recreation during @code{REPAIR}, @code{CREATE INDEX}, or
+@code{ALTER TABLE}.
+
 @item @code{have_bdb}
 @code{YES} if @code{mysqld} supports Berkeley DB tables. @code{DISABLED}
 if @code{--skip-bdb} is used.
@@ -28279,12 +28307,9 @@ unless you know what you are doing!
 @itemize
 
 @item
-Minimal length of word to be indexed is defined in
-@code{myisam/ftdefs.h} file by the line
-@example
-#define MIN_WORD_LEN 4
-@end example
-Change it to the value you prefer, recompile @strong{MySQL}, and rebuild
+Minimal length of word to be indexed is defined by @strong{MySQL}
+variable @code{ft_min_word_length}. @xref{SHOW VARIABLES}.
+Change it to the value you prefer, and rebuild
 your @code{FULLTEXT} indexes.
 
 @item
@@ -42463,6 +42488,8 @@ Responsible for @strong{MySQL} configure.
 Full-text search.
 @item
 Added keys to the @code{MERGE} library.
+@item
+@code{HANDLER} command.
 @end itemize
 
 @item Jeremy Cole
@@ -42801,6 +42828,8 @@ Our TODO section contains what we plan to have in 4.0. @xref{TODO MySQL 4.0}.
 
 @itemize @bullet
 @item
+Added @code{HANDLER} command.
+@item
 Added @code{SQL_CALC_FOUND_ROWS} and @code{FOUND_ROWS()}. This make it
 possible to know how many rows a query would have returned if one hadn't
 used @code{LIMIT}.
@@ -42903,6 +42932,9 @@ not yet 100% confident in this code.
 @appendixsubsec Changes in release 3.23.37
 @itemize @bullet
 @item
+Added variables @code{ft_min_word_len}, @code{ft_max_word_len}, and
+@code{ft_max_word_len_for_sort}.
+@item
 Changed @code{INNOBASE} to @code{INNODB} (because the @code{INNOBASE}
 name was already used). Note that all @code{configure} options and
 @code{mysqld} start options are now using @code{innodb} instead of
diff --git a/include/ft_global.h b/include/ft_global.h
index bca13271f34..415c0884989 100644
--- a/include/ft_global.h
+++ b/include/ft_global.h
@@ -27,6 +27,7 @@ extern "C" {
 #endif
 
 #define FT_QUERY_MAXLEN 1024
+#define HA_FT_MAXLEN 254
 
 typedef struct ft_doc_rec {
   my_off_t  dpos;
@@ -42,6 +43,10 @@ typedef struct st_ft_doclist {
 
 extern const char *ft_precompiled_stopwords[];
 
+extern uint ft_min_word_len;
+extern uint ft_max_word_len;
+extern uint ft_max_word_len_for_sort;
+
 int ft_init_stopwords(const char **);
 void ft_free_stopwords(void);
 
diff --git a/myisam/ft_dump.c b/myisam/ft_dump.c
index af49d834d0f..62e2a67dfc2 100644
--- a/myisam/ft_dump.c
+++ b/myisam/ft_dump.c
@@ -25,7 +25,7 @@ static void complain(int val);
 static int count=0, stats=0, dump=0, verbose=0;
 static char *query=NULL;
 
-#define MAX (MAX_WORD_LEN+10)
+#define MAX (HA_FT_MAXLEN+10)
 #define HOW_OFTEN_TO_WRITE 1000
 
 int main(int argc,char *argv[])
diff --git a/myisam/ft_parser.c b/myisam/ft_parser.c
index 09c93d7dc5b..d156c8c5556 100644
--- a/myisam/ft_parser.c
+++ b/myisam/ft_parser.c
@@ -164,7 +164,7 @@ byte ft_get_word(byte **start, byte *end, FT_WORD *word, FTB_PARAM *param)
     if ((param->trunc=(doc<end && *doc == FTB_TRUNC)))
       doc++;
 
-    if (word->len >= MIN_WORD_LEN && word->len < MAX_WORD_LEN &&
+    if (word->len >= ft_min_word_len && word->len < ft_max_word_len &&
         !is_stopword(word->pos, word->len))
     {
       *start=doc;
@@ -195,7 +195,7 @@ byte ft_simple_get_word(byte **start, byte *end, FT_WORD *word)
 
     word->len= (uint)(doc-word->pos) - mwc;
 
-    if (word->len >= MIN_WORD_LEN && word->len < MAX_WORD_LEN &&
+    if (word->len >= ft_min_word_len && word->len < ft_max_word_len &&
         !is_stopword(word->pos, word->len))
     {
       *start=doc;
diff --git a/myisam/ft_static.c b/myisam/ft_static.c
index 00d9d4ed19a..09afadec23f 100644
--- a/myisam/ft_static.c
+++ b/myisam/ft_static.c
@@ -18,6 +18,10 @@
 
 #include "ftdefs.h"
 
+uint ft_min_word_len=4;
+uint ft_max_word_len=HA_FT_MAXLEN;
+uint ft_max_word_len_for_sort=20;
+
 const MI_KEYSEG ft_keysegs[FT_SEGS]={
 {
     HA_KEYTYPE_VARTEXT,               /* type */
diff --git a/myisam/ft_stopwords.c b/myisam/ft_stopwords.c
index d796b87ed71..556f52650c8 100644
--- a/myisam/ft_stopwords.c
+++ b/myisam/ft_stopwords.c
@@ -48,7 +48,7 @@ int ft_init_stopwords(const char **sws)
 
   for(;*sws;sws++)
   {
-    if( (sw.len= (uint) strlen(sw.pos=*sws)) < MIN_WORD_LEN) continue;
+    if( (sw.len= (uint) strlen(sw.pos=*sws)) < ft_min_word_len) continue;
     if(!tree_insert(stopwords3, &sw, 0))
     {
       delete_tree(stopwords3); /* purecov: inspected */
diff --git a/myisam/ftdefs.h b/myisam/ftdefs.h
index b0aeb652b36..6721d136678 100644
--- a/myisam/ftdefs.h
+++ b/myisam/ftdefs.h
@@ -22,10 +22,6 @@
 #include <m_ctype.h>
 #include <my_tree.h>
 
-#define MIN_WORD_LEN 4
-#define MAX_WORD_LEN HA_FT_MAXLEN
-#define MAX_WORD_LEN_FOR_SORT 20
-
 #define HYPHEN_IS_DELIM
 #define HYPHEN_IS_CONCAT     /* not used for now */
 
diff --git a/myisam/fulltext.h b/myisam/fulltext.h
index 8fcac8172b1..f787c9bcfe8 100644
--- a/myisam/fulltext.h
+++ b/myisam/fulltext.h
@@ -24,7 +24,6 @@
 /* shoudn't be def'ed when linking with mysql */
 #undef EVAL_RUN
 
-#define HA_FT_MAXLEN 254
 #define HA_FT_WTYPE  HA_KEYTYPE_FLOAT
 #define HA_FT_WLEN   4
 #ifdef EVAL_RUN
diff --git a/myisam/mi_check.c b/myisam/mi_check.c
index d588da23d84..b19a3ffdfbc 100644
--- a/myisam/mi_check.c
+++ b/myisam/mi_check.c
@@ -1875,10 +1875,10 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info,
     if (sort_info->keyinfo->flag & HA_FULLTEXT)
     {
       sort_param.max_records=sort_info->max_records=
-        (ha_rows) (sort_info->filelength/MAX_WORD_LEN_FOR_SORT+1);
+        (ha_rows) (sort_info->filelength/ft_max_word_len_for_sort+1);
 
       sort_param.key_read=sort_ft_key_read;
-      sort_param.key_length+=MAX_WORD_LEN_FOR_SORT-MAX_WORD_LEN;
+      sort_param.key_length+=ft_max_word_len_for_sort-ft_max_word_len;
     }
     else
       sort_param.key_read=sort_key_read;
diff --git a/myisam/myisamchk.c b/myisam/myisamchk.c
index cbdd6a26767..cff5f781538 100644
--- a/myisam/myisamchk.c
+++ b/myisam/myisamchk.c
@@ -152,6 +152,12 @@ static CHANGEABLE_VAR changeable_vars[] = {
   { "sort_key_blocks",(long*) &check_param.sort_key_blocks,BUFFERS_WHEN_SORTING,4L,100L,0L,
     1L },
   { "decode_bits",(long*) &decode_bits,9L,4L,17L,0L,1L },
+  { "ft_min_word_len",         (long*) &ft_min_word_len,
+      4, 1, HA_FT_MAXLEN, 0, 1 },
+  { "ft_max_word_len",         (long*) &ft_max_word_len,
+      HA_FT_MAXLEN, 10, HA_FT_MAXLEN, 0, 1 },
+  { "ft_max_word_len_for_sort",(long*) &ft_max_word_len_for_sort,
+      20, 4, HA_FT_MAXLEN, 0, 1 },
   { NullS,(long*) 0,0L,0L,0L,0L,0L,} };
 
 enum options {OPT_CHARSETS_DIR=256, OPT_SET_CHARSET,OPT_START_CHECK_POS};
diff --git a/mysql-test/r/fulltext_var.result b/mysql-test/r/fulltext_var.result
new file mode 100644
index 00000000000..0352cd7addf
--- /dev/null
+++ b/mysql-test/r/fulltext_var.result
@@ -0,0 +1,4 @@
+Variable_name	Value
+ft_min_word_len	4
+ft_max_word_len	254
+ft_max_word_len_for_sort	20
diff --git a/mysql-test/t/fulltext_var.test b/mysql-test/t/fulltext_var.test
new file mode 100644
index 00000000000..71213d1195a
--- /dev/null
+++ b/mysql-test/t/fulltext_var.test
@@ -0,0 +1,5 @@
+#
+# Fulltext configurable parameters
+#
+
+show variables like "ft\_%";
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 3bdd1ff6826..bbda3eba1bb 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -2664,6 +2664,12 @@ CHANGEABLE_VAR changeable_vars[] = {
       DELAYED_QUEUE_SIZE, 1, ~0L, 0, 1 },
   { "flush_time",              (long*) &flush_time,
       FLUSH_TIME, 0, ~0L, 0, 1 },
+  { "ft_min_word_len",         (long*) &ft_min_word_len,
+      4, 1, HA_FT_MAXLEN, 0, 1 },
+  { "ft_max_word_len",         (long*) &ft_max_word_len,
+      HA_FT_MAXLEN, 10, HA_FT_MAXLEN, 0, 1 },
+  { "ft_max_word_len_for_sort",(long*) &ft_max_word_len_for_sort,
+      20, 4, HA_FT_MAXLEN, 0, 1 },
 #ifdef HAVE_GEMINI_DB
   { "gemini_buffer_cache",     (long*) &gemini_buffer_cache,
       128 * 8192, 16, LONG_MAX, 0, 1 },
@@ -2804,6 +2810,9 @@ struct show_var_st init_vars[]= {
   {"delayed_queue_size",      (char*) &delayed_queue_size,          SHOW_LONG},
   {"flush",                   (char*) &myisam_flush,                SHOW_MY_BOOL},
   {"flush_time",              (char*) &flush_time,                  SHOW_LONG},
+  {"ft_min_word_len",         (char*) &ft_min_word_len,             SHOW_LONG},
+  {"ft_max_word_len",         (char*) &ft_max_word_len,             SHOW_LONG},
+  {"ft_max_word_len_for_sort",(char*) &ft_max_word_len_for_sort,    SHOW_LONG},
 #ifdef HAVE_GEMINI_DB
   {"gemini_buffer_cache",     (char*) &gemini_buffer_cache,         SHOW_LONG},
   {"gemini_connection_limit", (char*) &gemini_connection_limit,     SHOW_LONG},
author	unknown <serg@serg.mysql.com>	2001-04-17 21:30:02 +0200
committer	unknown <serg@serg.mysql.com>	2001-04-17 21:30:02 +0200
commit	17ef8ef4443598265bca23d86d04fcc275da7804 (patch)
tree	6c294f797e941fcb6156771d2a0f0dd4393d06bb
parent	09f0384e7abce9d9cb744d0a20b897b5a77ef747 (diff)
download	mariadb-git-17ef8ef4443598265bca23d86d04fcc275da7804.tar.gz