summaryrefslogtreecommitdiff
path: root/myisam/ft_stopwords.c
diff options
context:
space:
mode:
authorunknown <serg@serg.mysql.com>2003-01-23 13:20:37 +0100
committerunknown <serg@serg.mysql.com>2003-01-23 13:20:37 +0100
commitfc09f13c5db5f6c8abab4817d81f6a2b0cb7666a (patch)
tree3db355daba734e755d05c110ab5412717c6c7408 /myisam/ft_stopwords.c
parent30a35bcfe456cc118381e5475c90dda32e414d72 (diff)
downloadmariadb-git-fc09f13c5db5f6c8abab4817d81f6a2b0cb7666a.tar.gz
--ft_stopword_file command-line option
Diffstat (limited to 'myisam/ft_stopwords.c')
-rw-r--r--myisam/ft_stopwords.c75
1 files changed, 61 insertions, 14 deletions
diff --git a/myisam/ft_stopwords.c b/myisam/ft_stopwords.c
index 9c2047c3b56..30a1f6e5dc8 100644
--- a/myisam/ft_stopwords.c
+++ b/myisam/ft_stopwords.c
@@ -33,27 +33,74 @@ static int FT_STOPWORD_cmp(void* cmp_arg __attribute__((unused)),
(uchar *)w2->pos,w2->len,0);
}
-int ft_init_stopwords(const char **sws)
+static void FT_STOPWORD_free(FT_STOPWORD *w, TREE_FREE action,
+ void *arg __attribute__((unused)))
{
- FT_STOPWORD sw;
+ if (action == free_free && ft_stopword_file)
+ my_free(w->pos, MYF(0));
+}
+static int ft_add_stopword(const char *w)
+{
+ FT_STOPWORD sw;
+ return !w ||
+ (((sw.len= (uint) strlen(sw.pos=w)) >= ft_min_word_len) &&
+ (tree_insert(stopwords3, &sw, 0)==NULL));
+}
- if(!stopwords3)
+int ft_init_stopwords()
+{
+ if (!stopwords3)
{
- if(!(stopwords3=(TREE *)my_malloc(sizeof(TREE),MYF(0)))) return -1;
- init_tree(stopwords3,0,0,sizeof(FT_STOPWORD),(qsort_cmp2)&FT_STOPWORD_cmp,0,
- NULL, NULL);
+ if (!(stopwords3=(TREE *)my_malloc(sizeof(TREE),MYF(0))))
+ return -1;
+ init_tree(stopwords3,0,0,sizeof(FT_STOPWORD),(qsort_cmp2)&FT_STOPWORD_cmp,
+ 0, (tree_element_free)&FT_STOPWORD_free, NULL);
}
- if(!sws) return 0;
-
- for(;*sws;sws++)
+ if (ft_stopword_file)
{
- if( (sw.len= (uint) strlen(sw.pos=*sws)) < ft_min_word_len) continue;
- if(!tree_insert(stopwords3, &sw, 0))
+ File fd;
+ my_off_t len;
+ byte *buffer, *start, *end;
+ FT_WORD w;
+ int err=-1;
+
+ if (!*ft_stopword_file)
+ return 0;
+
+ if ((fd=my_open(ft_stopword_file, O_RDONLY, MYF(MY_WME))) == -1)
+ return -1;
+ len=my_seek(fd, 0L, MY_SEEK_END, MYF(0));
+ my_seek(fd, 0L, MY_SEEK_SET, MYF(0));
+ if (!(start=buffer=my_malloc(len+1, MYF(MY_WME))))
+ {
+ my_close(fd, MYF(MY_WME));
+ return -1;
+ }
+ len=my_read(fd, buffer, len, MYF(MY_WME));
+ end=start+len;
+ while (ft_simple_get_word(&start, end, &w))
+ {
+ if (ft_add_stopword(my_strdup_with_length(w.pos, w.len, MYF(0))))
+ goto err1;
+ }
+ err=0;
+err1:
+ my_free(buffer, MYF(0));
+err0:
+ my_close(fd, MYF(MY_WME));
+ return err;
+ }
+ else
+ {
+ /* compatibility mode: to be removed */
+ char **sws=ft_precompiled_stopwords;
+
+ for (;*sws;sws++)
{
- delete_tree(stopwords3); /* purecov: inspected */
- return -1; /* purecov: inspected */
+ if (ft_add_stopword(*sws))
+ return -1;
}
}
return 0;
@@ -72,7 +119,7 @@ void ft_free_stopwords()
{
if (stopwords3)
{
- delete_tree(stopwords3); /* purecov: inspected */
+ delete_tree(stopwords3); /* purecov: inspected */
my_free((char*) stopwords3,MYF(0));
stopwords3=0;
}