diff options
author | unknown <serg@serg.mysql.com> | 2003-01-23 13:20:37 +0100 |
---|---|---|
committer | unknown <serg@serg.mysql.com> | 2003-01-23 13:20:37 +0100 |
commit | fc09f13c5db5f6c8abab4817d81f6a2b0cb7666a (patch) | |
tree | 3db355daba734e755d05c110ab5412717c6c7408 | |
parent | 30a35bcfe456cc118381e5475c90dda32e414d72 (diff) | |
download | mariadb-git-fc09f13c5db5f6c8abab4817d81f6a2b0cb7666a.tar.gz |
--ft_stopword_file command-line option
-rw-r--r-- | include/ft_global.h | 3 | ||||
-rw-r--r-- | myisam/ft_static.c | 1 | ||||
-rw-r--r-- | myisam/ft_stopwords.c | 75 | ||||
-rw-r--r-- | myisam/myisamchk.c | 2 | ||||
-rw-r--r-- | sql/mysqld.cc | 19 |
5 files changed, 77 insertions, 23 deletions
diff --git a/include/ft_global.h b/include/ft_global.h index 449cc57729f..9acdf6aaaf3 100644 --- a/include/ft_global.h +++ b/include/ft_global.h @@ -46,6 +46,7 @@ struct st_ft_info }; #endif +extern const char *ft_stopword_file; extern const char *ft_precompiled_stopwords[]; extern ulong ft_min_word_len; @@ -53,7 +54,7 @@ extern ulong ft_max_word_len; extern ulong ft_max_word_len_for_sort; extern const char *ft_boolean_syntax; -int ft_init_stopwords(const char **); +int ft_init_stopwords(void); void ft_free_stopwords(void); #define FT_NL 0 diff --git a/myisam/ft_static.c b/myisam/ft_static.c index e2a4fd8c0b1..7f78a11bb2f 100644 --- a/myisam/ft_static.c +++ b/myisam/ft_static.c @@ -67,6 +67,7 @@ FT_INFO *ft_init_search(uint mode, void *info, uint keynr, query, query_len, presort); } +const char *ft_stopword_file = 0; const char *ft_precompiled_stopwords[] = { #ifdef COMPILE_STOPWORDS_IN diff --git a/myisam/ft_stopwords.c b/myisam/ft_stopwords.c index 9c2047c3b56..30a1f6e5dc8 100644 --- a/myisam/ft_stopwords.c +++ b/myisam/ft_stopwords.c @@ -33,27 +33,74 @@ static int FT_STOPWORD_cmp(void* cmp_arg __attribute__((unused)), (uchar *)w2->pos,w2->len,0); } -int ft_init_stopwords(const char **sws) +static void FT_STOPWORD_free(FT_STOPWORD *w, TREE_FREE action, + void *arg __attribute__((unused))) { - FT_STOPWORD sw; + if (action == free_free && ft_stopword_file) + my_free(w->pos, MYF(0)); +} +static int ft_add_stopword(const char *w) +{ + FT_STOPWORD sw; + return !w || + (((sw.len= (uint) strlen(sw.pos=w)) >= ft_min_word_len) && + (tree_insert(stopwords3, &sw, 0)==NULL)); +} - if(!stopwords3) +int ft_init_stopwords() +{ + if (!stopwords3) { - if(!(stopwords3=(TREE *)my_malloc(sizeof(TREE),MYF(0)))) return -1; - init_tree(stopwords3,0,0,sizeof(FT_STOPWORD),(qsort_cmp2)&FT_STOPWORD_cmp,0, - NULL, NULL); + if (!(stopwords3=(TREE *)my_malloc(sizeof(TREE),MYF(0)))) + return -1; + init_tree(stopwords3,0,0,sizeof(FT_STOPWORD),(qsort_cmp2)&FT_STOPWORD_cmp, + 0, (tree_element_free)&FT_STOPWORD_free, NULL); } - if(!sws) return 0; - - for(;*sws;sws++) + if (ft_stopword_file) { - if( (sw.len= (uint) strlen(sw.pos=*sws)) < ft_min_word_len) continue; - if(!tree_insert(stopwords3, &sw, 0)) + File fd; + my_off_t len; + byte *buffer, *start, *end; + FT_WORD w; + int err=-1; + + if (!*ft_stopword_file) + return 0; + + if ((fd=my_open(ft_stopword_file, O_RDONLY, MYF(MY_WME))) == -1) + return -1; + len=my_seek(fd, 0L, MY_SEEK_END, MYF(0)); + my_seek(fd, 0L, MY_SEEK_SET, MYF(0)); + if (!(start=buffer=my_malloc(len+1, MYF(MY_WME)))) + { + my_close(fd, MYF(MY_WME)); + return -1; + } + len=my_read(fd, buffer, len, MYF(MY_WME)); + end=start+len; + while (ft_simple_get_word(&start, end, &w)) + { + if (ft_add_stopword(my_strdup_with_length(w.pos, w.len, MYF(0)))) + goto err1; + } + err=0; +err1: + my_free(buffer, MYF(0)); +err0: + my_close(fd, MYF(MY_WME)); + return err; + } + else + { + /* compatibility mode: to be removed */ + char **sws=ft_precompiled_stopwords; + + for (;*sws;sws++) { - delete_tree(stopwords3); /* purecov: inspected */ - return -1; /* purecov: inspected */ + if (ft_add_stopword(*sws)) + return -1; } } return 0; @@ -72,7 +119,7 @@ void ft_free_stopwords() { if (stopwords3) { - delete_tree(stopwords3); /* purecov: inspected */ + delete_tree(stopwords3); /* purecov: inspected */ my_free((char*) stopwords3,MYF(0)); stopwords3=0; } diff --git a/myisam/myisamchk.c b/myisam/myisamchk.c index 7fa91253850..f5bae72396a 100644 --- a/myisam/myisamchk.c +++ b/myisam/myisamchk.c @@ -860,7 +860,7 @@ static int myisamchk(MI_CHECK *param, my_string filename) else { if (share->fulltext_index) - ft_init_stopwords(ft_precompiled_stopwords); /* SerG */ + ft_init_stopwords(); if (!(param->testflag & T_READONLY)) lock_type = F_WRLCK; /* table is changed */ diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 46fbee0a7ea..5ddeb642340 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -2072,8 +2072,8 @@ int main(int argc, char **argv) #endif if (opt_myisam_log) - (void) mi_log( 1 ); - ft_init_stopwords(ft_precompiled_stopwords); + (void) mi_log(1); + ft_init_stopwords(); #ifdef __WIN__ if (!opt_console) @@ -2929,7 +2929,7 @@ enum options { OPT_CONNECT_TIMEOUT, OPT_DELAYED_INSERT_TIMEOUT, OPT_DELAYED_INSERT_LIMIT, OPT_DELAYED_QUEUE_SIZE, OPT_FLUSH_TIME, OPT_FT_MIN_WORD_LEN, - OPT_FT_MAX_WORD_LEN, OPT_FT_MAX_WORD_LEN_FOR_SORT, + OPT_FT_MAX_WORD_LEN, OPT_FT_MAX_WORD_LEN_FOR_SORT, OPT_FT_STOPWORD_FILE, OPT_INTERACTIVE_TIMEOUT, OPT_JOIN_BUFF_SIZE, OPT_KEY_BUFFER_SIZE, OPT_LONG_QUERY_TIME, OPT_LOWER_CASE_TABLE_NAMES, OPT_MAX_ALLOWED_PACKET, @@ -3415,7 +3415,8 @@ struct my_option my_long_options[] = (gptr*) &max_system_variables.log_warnings, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, { "back_log", OPT_BACK_LOG, - "The number of outstanding connection requests MySQL can have. This comes into play when the main MySQL thread gets very many connection requests in a very short time.", (gptr*) &back_log, (gptr*) &back_log, 0, GET_ULONG, + "The number of outstanding connection requests MySQL can have. This comes into play when the main MySQL thread gets very many connection requests in a very short time.", + (gptr*) &back_log, (gptr*) &back_log, 0, GET_ULONG, REQUIRED_ARG, 50, 1, 65535, 0, 1, 0 }, #ifdef HAVE_BERKELEY_DB { "bdb_cache_size", OPT_BDB_CACHE_SIZE, @@ -3468,9 +3469,13 @@ struct my_option my_long_options[] = (gptr*) &ft_max_word_len, (gptr*) &ft_max_word_len, 0, GET_ULONG, REQUIRED_ARG, HA_FT_MAXLEN, 10, HA_FT_MAXLEN, 0, 1, 0}, { "ft_max_word_len_for_sort", OPT_FT_MAX_WORD_LEN_FOR_SORT, - "Undocumented", (gptr*) &ft_max_word_len_for_sort, - (gptr*) &ft_max_word_len_for_sort, 0, GET_ULONG, REQUIRED_ARG, 20, 4, - HA_FT_MAXLEN, 0, 1, 0}, + "The maximum length of the word for repair_by_sorting. Longer words are included the slow way. The lower this value, the more words will be put in one sort bucket.", + (gptr*) &ft_max_word_len_for_sort, (gptr*) &ft_max_word_len_for_sort, 0, GET_ULONG, + REQUIRED_ARG, 20, 4, HA_FT_MAXLEN, 0, 1, 0}, + { "ft_stopword_file", OPT_FT_STOPWORD_FILE, + "Use stopwords from this file instead of built-in list.", + (gptr*) &ft_stopword_file, (gptr*) &ft_stopword_file, 0, GET_STR, + REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, #ifdef HAVE_INNOBASE_DB {"innodb_mirrored_log_groups", OPT_INNODB_MIRRORED_LOG_GROUPS, "Number of identical copies of log groups we keep for the database. Currently this should be set to 1.", |