summaryrefslogtreecommitdiff
path: root/myisam/ftdefs.h
diff options
context:
space:
mode:
Diffstat (limited to 'myisam/ftdefs.h')
-rw-r--r--myisam/ftdefs.h79
1 files changed, 69 insertions, 10 deletions
diff --git a/myisam/ftdefs.h b/myisam/ftdefs.h
index d9b4ff6b44d..62fa4362e19 100644
--- a/myisam/ftdefs.h
+++ b/myisam/ftdefs.h
@@ -1,15 +1,15 @@
/* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
-
+
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
-
+
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
+
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
@@ -22,15 +22,18 @@
#include <m_ctype.h>
#include <my_tree.h>
-#define MIN_WORD_LEN 4
-
#define HYPHEN_IS_DELIM
#define HYPHEN_IS_CONCAT /* not used for now */
#define COMPILE_STOPWORDS_IN
-/* Most of the formulae were shamelessly stolen from SMART distribution
- ftp://ftp.cs.cornell.edu/pub/smart/smart.11.0.tar.Z
+/* Interested readers may consult SMART
+ (ftp://ftp.cs.cornell.edu/pub/smart/smart.11.0.tar.Z)
+ for an excellent implementation of vector space model we use.
+ It also demonstrate the usage of different weghting techniques.
+ This code, though, is completely original and is not based on the
+ SMART code but was in some cases inspired by it.
+
NORM_PIVOT was taken from the article
A.Singhal, C.Buckley, M.Mitra, "Pivoted Document Length Normalization",
ACM SIGIR'96, 21-29, 1996
@@ -75,13 +78,26 @@ extern ulong collstat;
/* Mysterious, but w/o (double) GWS_IDF performs better :-o */
#define GWS_IDF log(aio->info->state->records/doc_cnt)
#define GWS_IDF1 log((double)aio->info->state->records/doc_cnt)
-#define GWS_PROB log(((double)(aio->info->state->records-doc_cnt))/doc_cnt)
+#define GWS_PROB ((aio->info->state->records > doc_cnt) ? log(((double)(aio->info->state->records-doc_cnt))/doc_cnt) : 0 )
#define GWS_FREQ (1.0/doc_cnt)
#define GWS_SQUARED pow(log((double)aio->info->state->records/doc_cnt),2)
#define GWS_CUBIC pow(log((double)aio->info->state->records/doc_cnt),3)
#define GWS_ENTROPY (1-(suml/sum-log(sum))/log(aio->info->state->records))
/*=================================================================*/
+/* Boolean search operators */
+#define FTB_YES (ft_boolean_syntax[0])
+#define FTB_EGAL (ft_boolean_syntax[1])
+#define FTB_NO (ft_boolean_syntax[2])
+#define FTB_INC (ft_boolean_syntax[3])
+#define FTB_DEC (ft_boolean_syntax[4])
+#define FTB_LBR (ft_boolean_syntax[5])
+#define FTB_RBR (ft_boolean_syntax[6])
+#define FTB_NEG (ft_boolean_syntax[7])
+#define FTB_TRUNC (ft_boolean_syntax[8])
+#define FTB_LQUOT (ft_boolean_syntax[10])
+#define FTB_RQUOT (ft_boolean_syntax[11])
+
typedef struct st_ft_word {
byte * pos;
uint len;
@@ -91,9 +107,52 @@ typedef struct st_ft_word {
#endif /* EVAL_RUN */
} FT_WORD;
+typedef struct st_ftb_param {
+ byte prev;
+ int yesno;
+ int plusminus;
+ bool pmsign;
+ bool trunc;
+ byte *quot;
+} FTB_PARAM;
+
int is_stopword(char *word, uint len);
uint _ft_make_key(MI_INFO *, uint , byte *, FT_WORD *, my_off_t);
-TREE * ft_parse(TREE *, byte *, int);
-FT_WORD * ft_linearize(MI_INFO *, uint, byte *, TREE *);
+byte ft_get_word(byte **, byte *, FT_WORD *, FTB_PARAM *);
+byte ft_simple_get_word(byte **, byte *, FT_WORD *);
+
+typedef struct _st_ft_seg_iterator {
+ uint num, len;
+ MI_KEYSEG *seg;
+ const byte *rec, *pos;
+} FT_SEG_ITERATOR;
+
+void _mi_ft_segiterator_init(MI_INFO *, uint, const byte *, FT_SEG_ITERATOR *);
+void _mi_ft_segiterator_dummy_init(const byte *, uint, FT_SEG_ITERATOR *);
+uint _mi_ft_segiterator(FT_SEG_ITERATOR *);
+
+void ft_parse_init(TREE *, CHARSET_INFO *);
+int ft_parse(TREE *, byte *, int);
+FT_WORD * ft_linearize(TREE *);
+FT_WORD * _mi_ft_parserecord(MI_INFO *, uint, byte *, const byte *);
+uint _mi_ft_parse(TREE *parsed, MI_INFO *info, uint keynr, const byte *record);
+
+extern const struct _ft_vft _ft_vft_nlq;
+FT_INFO *ft_init_nlq_search(MI_INFO *, uint, byte *, uint, my_bool);
+int ft_nlq_read_next(FT_INFO *, char *);
+float ft_nlq_find_relevance(FT_INFO *, byte *, uint);
+void ft_nlq_close_search(FT_INFO *);
+float ft_nlq_get_relevance(FT_INFO *);
+my_off_t ft_nlq_get_docid(FT_INFO *);
+void ft_nlq_reinit_search(FT_INFO *);
+
+extern const struct _ft_vft _ft_vft_boolean;
+FT_INFO *ft_init_boolean_search(MI_INFO *, uint, byte *, uint, my_bool);
+int ft_boolean_read_next(FT_INFO *, char *);
+float ft_boolean_find_relevance(FT_INFO *, byte *, uint);
+void ft_boolean_close_search(FT_INFO *);
+float ft_boolean_get_relevance(FT_INFO *);
+my_off_t ft_boolean_get_docid(FT_INFO *);
+void ft_boolean_reinit_search(FT_INFO *);