summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorunknown <serg@serg.mysql.com>2003-01-23 13:20:37 +0100
committerunknown <serg@serg.mysql.com>2003-01-23 13:20:37 +0100
commitfc09f13c5db5f6c8abab4817d81f6a2b0cb7666a (patch)
tree3db355daba734e755d05c110ab5412717c6c7408
parent30a35bcfe456cc118381e5475c90dda32e414d72 (diff)
downloadmariadb-git-fc09f13c5db5f6c8abab4817d81f6a2b0cb7666a.tar.gz
--ft_stopword_file command-line option
-rw-r--r--include/ft_global.h3
-rw-r--r--myisam/ft_static.c1
-rw-r--r--myisam/ft_stopwords.c75
-rw-r--r--myisam/myisamchk.c2
-rw-r--r--sql/mysqld.cc19
5 files changed, 77 insertions, 23 deletions
diff --git a/include/ft_global.h b/include/ft_global.h
index 449cc57729f..9acdf6aaaf3 100644
--- a/include/ft_global.h
+++ b/include/ft_global.h
@@ -46,6 +46,7 @@ struct st_ft_info
};
#endif
+extern const char *ft_stopword_file;
extern const char *ft_precompiled_stopwords[];
extern ulong ft_min_word_len;
@@ -53,7 +54,7 @@ extern ulong ft_max_word_len;
extern ulong ft_max_word_len_for_sort;
extern const char *ft_boolean_syntax;
-int ft_init_stopwords(const char **);
+int ft_init_stopwords(void);
void ft_free_stopwords(void);
#define FT_NL 0
diff --git a/myisam/ft_static.c b/myisam/ft_static.c
index e2a4fd8c0b1..7f78a11bb2f 100644
--- a/myisam/ft_static.c
+++ b/myisam/ft_static.c
@@ -67,6 +67,7 @@ FT_INFO *ft_init_search(uint mode, void *info, uint keynr,
query, query_len, presort);
}
+const char *ft_stopword_file = 0;
const char *ft_precompiled_stopwords[] = {
#ifdef COMPILE_STOPWORDS_IN
diff --git a/myisam/ft_stopwords.c b/myisam/ft_stopwords.c
index 9c2047c3b56..30a1f6e5dc8 100644
--- a/myisam/ft_stopwords.c
+++ b/myisam/ft_stopwords.c
@@ -33,27 +33,74 @@ static int FT_STOPWORD_cmp(void* cmp_arg __attribute__((unused)),
(uchar *)w2->pos,w2->len,0);
}
-int ft_init_stopwords(const char **sws)
+static void FT_STOPWORD_free(FT_STOPWORD *w, TREE_FREE action,
+ void *arg __attribute__((unused)))
{
- FT_STOPWORD sw;
+ if (action == free_free && ft_stopword_file)
+ my_free(w->pos, MYF(0));
+}
+static int ft_add_stopword(const char *w)
+{
+ FT_STOPWORD sw;
+ return !w ||
+ (((sw.len= (uint) strlen(sw.pos=w)) >= ft_min_word_len) &&
+ (tree_insert(stopwords3, &sw, 0)==NULL));
+}
- if(!stopwords3)
+int ft_init_stopwords()
+{
+ if (!stopwords3)
{
- if(!(stopwords3=(TREE *)my_malloc(sizeof(TREE),MYF(0)))) return -1;
- init_tree(stopwords3,0,0,sizeof(FT_STOPWORD),(qsort_cmp2)&FT_STOPWORD_cmp,0,
- NULL, NULL);
+ if (!(stopwords3=(TREE *)my_malloc(sizeof(TREE),MYF(0))))
+ return -1;
+ init_tree(stopwords3,0,0,sizeof(FT_STOPWORD),(qsort_cmp2)&FT_STOPWORD_cmp,
+ 0, (tree_element_free)&FT_STOPWORD_free, NULL);
}
- if(!sws) return 0;
-
- for(;*sws;sws++)
+ if (ft_stopword_file)
{
- if( (sw.len= (uint) strlen(sw.pos=*sws)) < ft_min_word_len) continue;
- if(!tree_insert(stopwords3, &sw, 0))
+ File fd;
+ my_off_t len;
+ byte *buffer, *start, *end;
+ FT_WORD w;
+ int err=-1;
+
+ if (!*ft_stopword_file)
+ return 0;
+
+ if ((fd=my_open(ft_stopword_file, O_RDONLY, MYF(MY_WME))) == -1)
+ return -1;
+ len=my_seek(fd, 0L, MY_SEEK_END, MYF(0));
+ my_seek(fd, 0L, MY_SEEK_SET, MYF(0));
+ if (!(start=buffer=my_malloc(len+1, MYF(MY_WME))))
+ {
+ my_close(fd, MYF(MY_WME));
+ return -1;
+ }
+ len=my_read(fd, buffer, len, MYF(MY_WME));
+ end=start+len;
+ while (ft_simple_get_word(&start, end, &w))
+ {
+ if (ft_add_stopword(my_strdup_with_length(w.pos, w.len, MYF(0))))
+ goto err1;
+ }
+ err=0;
+err1:
+ my_free(buffer, MYF(0));
+err0:
+ my_close(fd, MYF(MY_WME));
+ return err;
+ }
+ else
+ {
+ /* compatibility mode: to be removed */
+ char **sws=ft_precompiled_stopwords;
+
+ for (;*sws;sws++)
{
- delete_tree(stopwords3); /* purecov: inspected */
- return -1; /* purecov: inspected */
+ if (ft_add_stopword(*sws))
+ return -1;
}
}
return 0;
@@ -72,7 +119,7 @@ void ft_free_stopwords()
{
if (stopwords3)
{
- delete_tree(stopwords3); /* purecov: inspected */
+ delete_tree(stopwords3); /* purecov: inspected */
my_free((char*) stopwords3,MYF(0));
stopwords3=0;
}
diff --git a/myisam/myisamchk.c b/myisam/myisamchk.c
index 7fa91253850..f5bae72396a 100644
--- a/myisam/myisamchk.c
+++ b/myisam/myisamchk.c
@@ -860,7 +860,7 @@ static int myisamchk(MI_CHECK *param, my_string filename)
else
{
if (share->fulltext_index)
- ft_init_stopwords(ft_precompiled_stopwords); /* SerG */
+ ft_init_stopwords();
if (!(param->testflag & T_READONLY))
lock_type = F_WRLCK; /* table is changed */
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 46fbee0a7ea..5ddeb642340 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -2072,8 +2072,8 @@ int main(int argc, char **argv)
#endif
if (opt_myisam_log)
- (void) mi_log( 1 );
- ft_init_stopwords(ft_precompiled_stopwords);
+ (void) mi_log(1);
+ ft_init_stopwords();
#ifdef __WIN__
if (!opt_console)
@@ -2929,7 +2929,7 @@ enum options {
OPT_CONNECT_TIMEOUT, OPT_DELAYED_INSERT_TIMEOUT,
OPT_DELAYED_INSERT_LIMIT, OPT_DELAYED_QUEUE_SIZE,
OPT_FLUSH_TIME, OPT_FT_MIN_WORD_LEN,
- OPT_FT_MAX_WORD_LEN, OPT_FT_MAX_WORD_LEN_FOR_SORT,
+ OPT_FT_MAX_WORD_LEN, OPT_FT_MAX_WORD_LEN_FOR_SORT, OPT_FT_STOPWORD_FILE,
OPT_INTERACTIVE_TIMEOUT, OPT_JOIN_BUFF_SIZE,
OPT_KEY_BUFFER_SIZE, OPT_LONG_QUERY_TIME,
OPT_LOWER_CASE_TABLE_NAMES, OPT_MAX_ALLOWED_PACKET,
@@ -3415,7 +3415,8 @@ struct my_option my_long_options[] =
(gptr*) &max_system_variables.log_warnings, 0, GET_BOOL, NO_ARG, 0, 0, 0,
0, 0, 0},
{ "back_log", OPT_BACK_LOG,
- "The number of outstanding connection requests MySQL can have. This comes into play when the main MySQL thread gets very many connection requests in a very short time.", (gptr*) &back_log, (gptr*) &back_log, 0, GET_ULONG,
+ "The number of outstanding connection requests MySQL can have. This comes into play when the main MySQL thread gets very many connection requests in a very short time.",
+ (gptr*) &back_log, (gptr*) &back_log, 0, GET_ULONG,
REQUIRED_ARG, 50, 1, 65535, 0, 1, 0 },
#ifdef HAVE_BERKELEY_DB
{ "bdb_cache_size", OPT_BDB_CACHE_SIZE,
@@ -3468,9 +3469,13 @@ struct my_option my_long_options[] =
(gptr*) &ft_max_word_len, (gptr*) &ft_max_word_len, 0, GET_ULONG,
REQUIRED_ARG, HA_FT_MAXLEN, 10, HA_FT_MAXLEN, 0, 1, 0},
{ "ft_max_word_len_for_sort", OPT_FT_MAX_WORD_LEN_FOR_SORT,
- "Undocumented", (gptr*) &ft_max_word_len_for_sort,
- (gptr*) &ft_max_word_len_for_sort, 0, GET_ULONG, REQUIRED_ARG, 20, 4,
- HA_FT_MAXLEN, 0, 1, 0},
+ "The maximum length of the word for repair_by_sorting. Longer words are included the slow way. The lower this value, the more words will be put in one sort bucket.",
+ (gptr*) &ft_max_word_len_for_sort, (gptr*) &ft_max_word_len_for_sort, 0, GET_ULONG,
+ REQUIRED_ARG, 20, 4, HA_FT_MAXLEN, 0, 1, 0},
+ { "ft_stopword_file", OPT_FT_STOPWORD_FILE,
+ "Use stopwords from this file instead of built-in list.",
+ (gptr*) &ft_stopword_file, (gptr*) &ft_stopword_file, 0, GET_STR,
+ REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
#ifdef HAVE_INNOBASE_DB
{"innodb_mirrored_log_groups", OPT_INNODB_MIRRORED_LOG_GROUPS,
"Number of identical copies of log groups we keep for the database. Currently this should be set to 1.",