summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorunknown <svoj@mysql.com>2005-12-28 16:05:30 +0400
committerunknown <svoj@mysql.com>2005-12-28 16:05:30 +0400
commit38005eae6ac73bb2a58287b6eb9da14816e8424b (patch)
tree83cc0fcb79bcf2f4024c799124de23490f67d89d /include
parent5bfbfb24e5e5467bea919ddf5bf0406308e01a15 (diff)
downloadmariadb-git-38005eae6ac73bb2a58287b6eb9da14816e8424b.tar.gz
WL#2575 - Fulltext: Parser plugin for FTS
Manual merge. Makefile.am: Added new 'plugin' subdir. configure.in: Added plugin related makefiles. include/my_base.h: Added HA_OPEN_FROM_SQL_LAYER flag - indicates that a table was openned from the sql layer. Added HA_OPTION_RELIES_ON_SQL_LAYER flag - indicates that a table relies on the sql layer. Added HA_CREATE_RELIES_ON_SQL_LAYER flag - indicates that a table must be created with HA_OPTION_RELIES_ON_SQL_LAYER flag. include/myisam.h: Distinct fulltext parser number added. include/plugin.h: Revise comment. sql/ha_myisam.cc: Pass HA_OPEN_FROM_SQL_LAYER flag to mi_open(). Pass HA_CREATE_RELIES_ON_SQL_LAYER flag to mi_create(). sql/sql_plugin.cc: Reuse "unused" dynamic array elements. A check for plugin info interface version. sql/sql_plugin.h: Added plugin_type_names[] - string plugin type names. sql/sql_show.cc: Use plugin_type_names array instead of switch to find literal parser name representation. sql/sql_table.cc: Fixed that ALTER TABLE ... ADD INDEX loses WITH PARSER info. storage/myisam/ft_boolean_search.c: Call fulltext parser init() function, pass MYSQL_FTPARSER_PARAM, returned by ftparser_call_initializer(), to parser->parse(). storage/myisam/ft_nlq_search.c: Call fulltext parser init() function, pass MYSQL_FTPARSER_PARAM, returned by ftparser_call_initializer(), to parser->parse(). storage/myisam/ft_parser.c: Added two functions: ftparser_call_initializer() - calls parser->init() function if specified and parser is not yet initialized. Returns MYSQL_FTPARSER_PARAM *. ftparser_call_deinitializer() - calls parser->deinit() function if specified and parser was initialized. Deinitializes all parsers. ft_parse() accepts additional param now - MYSQL_FTPARSER_PARM and passes it to parser->parse(). storage/myisam/ft_update.c: Call fulltext parser init() function, pass MYSQL_FTPARSER_PARAM, returned by ftparser_call_initializer(), to _mi_ft_parse(). _mi_ft_parse() accepts additional param now - MYSQL_FTPARSER_PARAM and passes it to parser->parse(). storage/myisam/ftdefs.h: Prototypes for new functions were added. MYSQL_FTPARSER_PARAM was added to ft_parse and _mi_ft_parse(). storage/myisam/mi_close.c: Free ftparser_param allocated by ftparser_call_initializer(). storage/myisam/mi_create.c: If a table relies on the sql layer, set HA_OPTION_RELIES_ON_SQL_LAYER. storage/myisam/mi_locking.c: Call deinitializer for each initialized parser. storage/myisam/mi_open.c: Set default values for share->ftparser and keydef->ftparser_nr. If a table is openned from the non-sql layer and HA_OPTION_RELIES_ON_SQL_LAYER is set, raise HA_ERR_UNSUPPORTED error. storage/myisam/myisamdef.h: Added number of distinct parsers to MYISAM_SHARE. Added ftparser_param to MI_INFO. plugin/Makefile.am: New BitKeeper file ``plugin/Makefile.am'' plugin/fulltext/Makefile.am: New BitKeeper file ``plugin/fulltext/Makefile.am'' plugin/fulltext/plugin_example.c: New BitKeeper file ``plugin/fulltext/plugin_example.c''
Diffstat (limited to 'include')
-rw-r--r--include/my_base.h3
-rw-r--r--include/myisam.h1
-rw-r--r--include/plugin.h136
3 files changed, 96 insertions, 44 deletions
diff --git a/include/my_base.h b/include/my_base.h
index 4e1a573217b..9b53ebffeb4 100644
--- a/include/my_base.h
+++ b/include/my_base.h
@@ -51,6 +51,7 @@
#define HA_OPEN_DELAY_KEY_WRITE 8 /* Don't update index */
#define HA_OPEN_ABORT_IF_CRASHED 16
#define HA_OPEN_FOR_REPAIR 32 /* open even if crashed */
+#define HA_OPEN_FROM_SQL_LAYER 64
/* The following is parameter to ha_rkey() how to use key */
@@ -246,6 +247,7 @@ enum ha_base_keytype {
#define HA_OPTION_DELAY_KEY_WRITE 64
#define HA_OPTION_NO_PACK_KEYS 128 /* Reserved for MySQL */
#define HA_OPTION_CREATE_FROM_ENGINE 256
+#define HA_OPTION_RELIES_ON_SQL_LAYER 512
#define HA_OPTION_TEMP_COMPRESS_RECORD ((uint) 16384) /* set by isamchk */
#define HA_OPTION_READ_ONLY_DATA ((uint) 32768) /* Set by isamchk */
@@ -256,6 +258,7 @@ enum ha_base_keytype {
#define HA_CREATE_TMP_TABLE 4
#define HA_CREATE_CHECKSUM 8
#define HA_CREATE_DELAY_KEY_WRITE 64
+#define HA_CREATE_RELIES_ON_SQL_LAYER 128
/*
The following flags (OR-ed) are passed to handler::info() method.
diff --git a/include/myisam.h b/include/myisam.h
index 19458e52f61..4d55409b8b3 100644
--- a/include/myisam.h
+++ b/include/myisam.h
@@ -198,6 +198,7 @@ typedef struct st_mi_keydef /* Key definition with open & info */
uint16 maxlength; /* max length of (packed) key (auto) */
uint16 block_size; /* block_size (auto) */
uint32 version; /* For concurrent read/write */
+ uint32 ftparser_nr; /* distinct ftparser number */
HA_KEYSEG *seg,*end;
struct st_mysql_ftparser *parser; /* Fulltext [pre]parser */
diff --git a/include/plugin.h b/include/plugin.h
index 4486a719d1d..029d7a611fb 100644
--- a/include/plugin.h
+++ b/include/plugin.h
@@ -66,36 +66,50 @@ struct st_mysql_plugin
/* Parsing modes. Set in MYSQL_FTPARSER_PARAM::mode */
/*
- The fast and simple mode. Parser is expected to return only those words that
- go into the index. Stopwords or too short/long words should not be returned.
- 'boolean_info' argument of mysql_add_word() does not have to be set.
+ Fast and simple mode. This mode is used for indexing, and natural
+ language queries.
- This mode is used for indexing, and natural language queries.
+ The parser is expected to return only those words that go into the
+ index. Stopwords or too short/long words should not be returned. The
+ 'boolean_info' argument of mysql_add_word() does not have to be set.
*/
#define MYSQL_FTPARSER_SIMPLE_MODE 0
/*
- The parser is not allowed to ignore words in this mode. Every word should
- be returned, including stopwords and words that are too short or long.
- 'boolean_info' argument of mysql_add_word() does not have to be set.
+ Parse with stopwords mode. This mode is used in boolean searches for
+ "phrase matching."
- This mode is used in boolean searches for "phrase matching."
+ The parser is not allowed to ignore words in this mode. Every word
+ should be returned, including stopwords and words that are too short
+ or long. The 'boolean_info' argument of mysql_add_word() does not
+ have to be set.
*/
#define MYSQL_FTPARSER_WITH_STOPWORDS 1
/*
- Parse in boolean mode. The parser should provide a valid
- MYSQL_FTPARSER_BOOLEAN_INFO structure in the 'boolean_info' argument
- to mysql_add_word(). Usually that means that the parser should
- recognize boolean operators in the parsing stream and set appropriate
- fields in MYSQL_FTPARSER_BOOLEAN_INFO structure accordingly. As
- for MYSQL_FTPARSER_WITH_STOPWORDS mode, no word should be ignored.
+ Parse in boolean mode. This mode is used to parse a boolean query string.
+
+ The parser should provide a valid MYSQL_FTPARSER_BOOLEAN_INFO
+ structure in the 'boolean_info' argument to mysql_add_word().
+ Usually that means that the parser should recognize boolean operators
+ in the parsing stream and set appropriate fields in
+ MYSQL_FTPARSER_BOOLEAN_INFO structure accordingly. As for
+ MYSQL_FTPARSER_WITH_STOPWORDS mode, no word should be ignored.
Instead, use FT_TOKEN_STOPWORD for the token type of such a word.
-
- This mode is used to parse a boolean query string.
*/
#define MYSQL_FTPARSER_FULL_BOOLEAN_INFO 2
+/*
+ Token types for boolean mode searching (used for the type member of
+ MYSQL_FTPARSER_BOOLEAN_INFO struct)
+
+ FT_TOKEN_EOF: End of data.
+ FT_TOKEN_WORD: Regular word.
+ FT_TOKEN_LEFT_PAREN: Left parenthesis (start of group/sub-expression).
+ FT_TOKEN_RIGHT_PAREN: Right parenthesis (end of group/sub-expression).
+ FT_TOKEN_STOPWORD: Stopword.
+*/
+
enum enum_ft_token_type
{
FT_TOKEN_EOF= 0,
@@ -110,8 +124,27 @@ enum enum_ft_token_type
boolean-mode metadata to the MySQL search engine for every word in
the search query. A valid instance of this structure must be filled
in by the plugin parser and passed as an argument in the call to
- mysql_add_word (the function from structure MYSQL_FTPARSER_PARAM)
- when a query is parsed in boolean mode.
+ mysql_add_word (the callback function in the MYSQL_FTPARSER_PARAM
+ structure) when a query is parsed in boolean mode.
+
+ type: The token type. Should be one of the enum_ft_token_type values.
+
+ yesno: Whether the word must be present for a match to occur:
+ >0 Must be present
+ <0 Must not be present
+ 0 Neither; the word is optional but its presence increases the relevance
+ With the default settings of the ft_boolean_syntax system variable,
+ >0 corresponds to the '+' operator, <0 corrresponds to the '-' operator,
+ and 0 means neither operator was used.
+
+ weight_adjust: A weighting factor that determines how much a match
+ for the word counts. Can be used to increase or decrease the word's
+ importance.
+
+ wasign: The sign of the weight_adjust value.
+
+ trunc: Corresponds to the '*' operator in the default setting of the
+ ft_boolean_syntax system variable.
*/
typedef struct st_mysql_ftparser_boolean_info
@@ -129,48 +162,63 @@ typedef struct st_mysql_ftparser_boolean_info
/*
An argument of the full-text parser plugin. This structure is
- filled by MySQL server and passed to the parsing function of the
+ filled in by MySQL server and passed to the parsing function of the
plugin as an in/out parameter.
+
+ mysql_parse: A pointer to the built-in parser implementation of the
+ server. It's set by the server and can be used by the parser plugin
+ to invoke the MySQL default parser. If plugin's role is to extract
+ textual data from .doc, .pdf or .xml content, it might extract
+ plaintext from the content, and then pass the text to the default
+ MySQL parser to be parsed. When mysql_parser is called, its param
+ argument should be given as the mysql_ftparam value.
+
+ mysql_add_word: A server callback to add a new word. When parsing
+ a document, the server sets this to point at a function that adds
+ the word to MySQL full-text index. When parsing a search query,
+ this function will add the new word to the list of words to search
+ for. When mysql_add_word is called, its param argument should be
+ given as the mysql_ftparam value. boolean_info can be NULL for all
+ cases except when mode is MYSQL_FTPARSER_FULL_BOOLEAN_INFO.
+
+ ftparser_state: A generic pointer. The plugin can set it to point
+ to information to be used internally for its own purposes.
+
+ mysql_ftparam: This is set by the server. It is passed as the first
+ argument to the mysql_parse or mysql_add_word callback. The plugin
+ should not modify it.
+
+ cs: Information about the character set of the document or query string.
+
+ doc: A pointer to the document or query string to be parsed.
+
+ length: Length of the document or query string, in bytes.
+
+ mode: The parsing mode. With boolean operators, with stopwords, or
+ nothing. See MYSQL_FTPARSER_* constants above.
*/
typedef struct st_mysql_ftparser_param
{
- /*
- A fallback pointer to the built-in parser implementation
- of the server. It's set by the server and can be used
- by the parser plugin to invoke the MySQL default parser.
- If plugin's role is to extract textual data from .doc,
- .pdf or .xml content, it might use the default MySQL parser
- to parse the extracted plaintext string.
- */
int (*mysql_parse)(void *param, byte *doc, uint doc_len);
- /*
- A server callback to add a new word.
- When parsing a document, the server sets this to point at
- a function that adds the word to MySQL full-text index.
- When parsing a search query, this function will
- add the new word to the list of words to search for.
- boolean_info can be NULL for all cases except
- MYSQL_FTPARSER_FULL_BOOLEAN_INFO mode.
- */
int (*mysql_add_word)(void *param, byte *word, uint word_len,
MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info);
- /* A pointer to the parser local state. This is an inout parameter. */
void *ftparser_state;
void *mysql_ftparam;
- /* Character set of the document or the query */
CHARSET_INFO *cs;
- /* A pointer to the document or the query to be parsed */
byte *doc;
- /* Document/query length */
uint length;
- /*
- Parsing mode: with boolean operators, with stopwords, or nothing.
- See MYSQL_FTPARSER_* constants above.
- */
int mode;
} MYSQL_FTPARSER_PARAM;
+/*
+ Full-text parser descriptor.
+
+ interface_version is, e.g., MYSQL_FTPARSER_INTERFACE_VERSION.
+ The parsing, initialization, and deinitialization functions are
+ invoked per SQL statement for which the parser is used.
+*/
+
struct st_mysql_ftparser
{
int interface_version;