diff options
author | unknown <svoj@mysql.com> | 2005-12-28 16:05:30 +0400 |
---|---|---|
committer | unknown <svoj@mysql.com> | 2005-12-28 16:05:30 +0400 |
commit | 38005eae6ac73bb2a58287b6eb9da14816e8424b (patch) | |
tree | 83cc0fcb79bcf2f4024c799124de23490f67d89d /include | |
parent | 5bfbfb24e5e5467bea919ddf5bf0406308e01a15 (diff) | |
download | mariadb-git-38005eae6ac73bb2a58287b6eb9da14816e8424b.tar.gz |
WL#2575 - Fulltext: Parser plugin for FTS
Manual merge.
Makefile.am:
Added new 'plugin' subdir.
configure.in:
Added plugin related makefiles.
include/my_base.h:
Added HA_OPEN_FROM_SQL_LAYER flag - indicates that a table was openned from the sql layer.
Added HA_OPTION_RELIES_ON_SQL_LAYER flag - indicates that a table relies on the sql layer.
Added HA_CREATE_RELIES_ON_SQL_LAYER flag - indicates that a table must be created with
HA_OPTION_RELIES_ON_SQL_LAYER flag.
include/myisam.h:
Distinct fulltext parser number added.
include/plugin.h:
Revise comment.
sql/ha_myisam.cc:
Pass HA_OPEN_FROM_SQL_LAYER flag to mi_open().
Pass HA_CREATE_RELIES_ON_SQL_LAYER flag to mi_create().
sql/sql_plugin.cc:
Reuse "unused" dynamic array elements.
A check for plugin info interface version.
sql/sql_plugin.h:
Added plugin_type_names[] - string plugin type names.
sql/sql_show.cc:
Use plugin_type_names array instead of switch to find literal parser name representation.
sql/sql_table.cc:
Fixed that ALTER TABLE ... ADD INDEX loses WITH PARSER info.
storage/myisam/ft_boolean_search.c:
Call fulltext parser init() function, pass MYSQL_FTPARSER_PARAM, returned by
ftparser_call_initializer(), to parser->parse().
storage/myisam/ft_nlq_search.c:
Call fulltext parser init() function, pass MYSQL_FTPARSER_PARAM, returned by
ftparser_call_initializer(), to parser->parse().
storage/myisam/ft_parser.c:
Added two functions:
ftparser_call_initializer() - calls parser->init() function if specified and parser is not yet
initialized. Returns MYSQL_FTPARSER_PARAM *.
ftparser_call_deinitializer() - calls parser->deinit() function if specified and parser was
initialized. Deinitializes all parsers.
ft_parse() accepts additional param now - MYSQL_FTPARSER_PARM and passes it to parser->parse().
storage/myisam/ft_update.c:
Call fulltext parser init() function, pass MYSQL_FTPARSER_PARAM, returned by
ftparser_call_initializer(), to _mi_ft_parse().
_mi_ft_parse() accepts additional param now - MYSQL_FTPARSER_PARAM and passes
it to parser->parse().
storage/myisam/ftdefs.h:
Prototypes for new functions were added. MYSQL_FTPARSER_PARAM was added
to ft_parse and _mi_ft_parse().
storage/myisam/mi_close.c:
Free ftparser_param allocated by ftparser_call_initializer().
storage/myisam/mi_create.c:
If a table relies on the sql layer, set HA_OPTION_RELIES_ON_SQL_LAYER.
storage/myisam/mi_locking.c:
Call deinitializer for each initialized parser.
storage/myisam/mi_open.c:
Set default values for share->ftparser and keydef->ftparser_nr.
If a table is openned from the non-sql layer and HA_OPTION_RELIES_ON_SQL_LAYER is set, raise
HA_ERR_UNSUPPORTED error.
storage/myisam/myisamdef.h:
Added number of distinct parsers to MYISAM_SHARE.
Added ftparser_param to MI_INFO.
plugin/Makefile.am:
New BitKeeper file ``plugin/Makefile.am''
plugin/fulltext/Makefile.am:
New BitKeeper file ``plugin/fulltext/Makefile.am''
plugin/fulltext/plugin_example.c:
New BitKeeper file ``plugin/fulltext/plugin_example.c''
Diffstat (limited to 'include')
-rw-r--r-- | include/my_base.h | 3 | ||||
-rw-r--r-- | include/myisam.h | 1 | ||||
-rw-r--r-- | include/plugin.h | 136 |
3 files changed, 96 insertions, 44 deletions
diff --git a/include/my_base.h b/include/my_base.h index 4e1a573217b..9b53ebffeb4 100644 --- a/include/my_base.h +++ b/include/my_base.h @@ -51,6 +51,7 @@ #define HA_OPEN_DELAY_KEY_WRITE 8 /* Don't update index */ #define HA_OPEN_ABORT_IF_CRASHED 16 #define HA_OPEN_FOR_REPAIR 32 /* open even if crashed */ +#define HA_OPEN_FROM_SQL_LAYER 64 /* The following is parameter to ha_rkey() how to use key */ @@ -246,6 +247,7 @@ enum ha_base_keytype { #define HA_OPTION_DELAY_KEY_WRITE 64 #define HA_OPTION_NO_PACK_KEYS 128 /* Reserved for MySQL */ #define HA_OPTION_CREATE_FROM_ENGINE 256 +#define HA_OPTION_RELIES_ON_SQL_LAYER 512 #define HA_OPTION_TEMP_COMPRESS_RECORD ((uint) 16384) /* set by isamchk */ #define HA_OPTION_READ_ONLY_DATA ((uint) 32768) /* Set by isamchk */ @@ -256,6 +258,7 @@ enum ha_base_keytype { #define HA_CREATE_TMP_TABLE 4 #define HA_CREATE_CHECKSUM 8 #define HA_CREATE_DELAY_KEY_WRITE 64 +#define HA_CREATE_RELIES_ON_SQL_LAYER 128 /* The following flags (OR-ed) are passed to handler::info() method. diff --git a/include/myisam.h b/include/myisam.h index 19458e52f61..4d55409b8b3 100644 --- a/include/myisam.h +++ b/include/myisam.h @@ -198,6 +198,7 @@ typedef struct st_mi_keydef /* Key definition with open & info */ uint16 maxlength; /* max length of (packed) key (auto) */ uint16 block_size; /* block_size (auto) */ uint32 version; /* For concurrent read/write */ + uint32 ftparser_nr; /* distinct ftparser number */ HA_KEYSEG *seg,*end; struct st_mysql_ftparser *parser; /* Fulltext [pre]parser */ diff --git a/include/plugin.h b/include/plugin.h index 4486a719d1d..029d7a611fb 100644 --- a/include/plugin.h +++ b/include/plugin.h @@ -66,36 +66,50 @@ struct st_mysql_plugin /* Parsing modes. Set in MYSQL_FTPARSER_PARAM::mode */ /* - The fast and simple mode. Parser is expected to return only those words that - go into the index. Stopwords or too short/long words should not be returned. - 'boolean_info' argument of mysql_add_word() does not have to be set. + Fast and simple mode. This mode is used for indexing, and natural + language queries. - This mode is used for indexing, and natural language queries. + The parser is expected to return only those words that go into the + index. Stopwords or too short/long words should not be returned. The + 'boolean_info' argument of mysql_add_word() does not have to be set. */ #define MYSQL_FTPARSER_SIMPLE_MODE 0 /* - The parser is not allowed to ignore words in this mode. Every word should - be returned, including stopwords and words that are too short or long. - 'boolean_info' argument of mysql_add_word() does not have to be set. + Parse with stopwords mode. This mode is used in boolean searches for + "phrase matching." - This mode is used in boolean searches for "phrase matching." + The parser is not allowed to ignore words in this mode. Every word + should be returned, including stopwords and words that are too short + or long. The 'boolean_info' argument of mysql_add_word() does not + have to be set. */ #define MYSQL_FTPARSER_WITH_STOPWORDS 1 /* - Parse in boolean mode. The parser should provide a valid - MYSQL_FTPARSER_BOOLEAN_INFO structure in the 'boolean_info' argument - to mysql_add_word(). Usually that means that the parser should - recognize boolean operators in the parsing stream and set appropriate - fields in MYSQL_FTPARSER_BOOLEAN_INFO structure accordingly. As - for MYSQL_FTPARSER_WITH_STOPWORDS mode, no word should be ignored. + Parse in boolean mode. This mode is used to parse a boolean query string. + + The parser should provide a valid MYSQL_FTPARSER_BOOLEAN_INFO + structure in the 'boolean_info' argument to mysql_add_word(). + Usually that means that the parser should recognize boolean operators + in the parsing stream and set appropriate fields in + MYSQL_FTPARSER_BOOLEAN_INFO structure accordingly. As for + MYSQL_FTPARSER_WITH_STOPWORDS mode, no word should be ignored. Instead, use FT_TOKEN_STOPWORD for the token type of such a word. - - This mode is used to parse a boolean query string. */ #define MYSQL_FTPARSER_FULL_BOOLEAN_INFO 2 +/* + Token types for boolean mode searching (used for the type member of + MYSQL_FTPARSER_BOOLEAN_INFO struct) + + FT_TOKEN_EOF: End of data. + FT_TOKEN_WORD: Regular word. + FT_TOKEN_LEFT_PAREN: Left parenthesis (start of group/sub-expression). + FT_TOKEN_RIGHT_PAREN: Right parenthesis (end of group/sub-expression). + FT_TOKEN_STOPWORD: Stopword. +*/ + enum enum_ft_token_type { FT_TOKEN_EOF= 0, @@ -110,8 +124,27 @@ enum enum_ft_token_type boolean-mode metadata to the MySQL search engine for every word in the search query. A valid instance of this structure must be filled in by the plugin parser and passed as an argument in the call to - mysql_add_word (the function from structure MYSQL_FTPARSER_PARAM) - when a query is parsed in boolean mode. + mysql_add_word (the callback function in the MYSQL_FTPARSER_PARAM + structure) when a query is parsed in boolean mode. + + type: The token type. Should be one of the enum_ft_token_type values. + + yesno: Whether the word must be present for a match to occur: + >0 Must be present + <0 Must not be present + 0 Neither; the word is optional but its presence increases the relevance + With the default settings of the ft_boolean_syntax system variable, + >0 corresponds to the '+' operator, <0 corrresponds to the '-' operator, + and 0 means neither operator was used. + + weight_adjust: A weighting factor that determines how much a match + for the word counts. Can be used to increase or decrease the word's + importance. + + wasign: The sign of the weight_adjust value. + + trunc: Corresponds to the '*' operator in the default setting of the + ft_boolean_syntax system variable. */ typedef struct st_mysql_ftparser_boolean_info @@ -129,48 +162,63 @@ typedef struct st_mysql_ftparser_boolean_info /* An argument of the full-text parser plugin. This structure is - filled by MySQL server and passed to the parsing function of the + filled in by MySQL server and passed to the parsing function of the plugin as an in/out parameter. + + mysql_parse: A pointer to the built-in parser implementation of the + server. It's set by the server and can be used by the parser plugin + to invoke the MySQL default parser. If plugin's role is to extract + textual data from .doc, .pdf or .xml content, it might extract + plaintext from the content, and then pass the text to the default + MySQL parser to be parsed. When mysql_parser is called, its param + argument should be given as the mysql_ftparam value. + + mysql_add_word: A server callback to add a new word. When parsing + a document, the server sets this to point at a function that adds + the word to MySQL full-text index. When parsing a search query, + this function will add the new word to the list of words to search + for. When mysql_add_word is called, its param argument should be + given as the mysql_ftparam value. boolean_info can be NULL for all + cases except when mode is MYSQL_FTPARSER_FULL_BOOLEAN_INFO. + + ftparser_state: A generic pointer. The plugin can set it to point + to information to be used internally for its own purposes. + + mysql_ftparam: This is set by the server. It is passed as the first + argument to the mysql_parse or mysql_add_word callback. The plugin + should not modify it. + + cs: Information about the character set of the document or query string. + + doc: A pointer to the document or query string to be parsed. + + length: Length of the document or query string, in bytes. + + mode: The parsing mode. With boolean operators, with stopwords, or + nothing. See MYSQL_FTPARSER_* constants above. */ typedef struct st_mysql_ftparser_param { - /* - A fallback pointer to the built-in parser implementation - of the server. It's set by the server and can be used - by the parser plugin to invoke the MySQL default parser. - If plugin's role is to extract textual data from .doc, - .pdf or .xml content, it might use the default MySQL parser - to parse the extracted plaintext string. - */ int (*mysql_parse)(void *param, byte *doc, uint doc_len); - /* - A server callback to add a new word. - When parsing a document, the server sets this to point at - a function that adds the word to MySQL full-text index. - When parsing a search query, this function will - add the new word to the list of words to search for. - boolean_info can be NULL for all cases except - MYSQL_FTPARSER_FULL_BOOLEAN_INFO mode. - */ int (*mysql_add_word)(void *param, byte *word, uint word_len, MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info); - /* A pointer to the parser local state. This is an inout parameter. */ void *ftparser_state; void *mysql_ftparam; - /* Character set of the document or the query */ CHARSET_INFO *cs; - /* A pointer to the document or the query to be parsed */ byte *doc; - /* Document/query length */ uint length; - /* - Parsing mode: with boolean operators, with stopwords, or nothing. - See MYSQL_FTPARSER_* constants above. - */ int mode; } MYSQL_FTPARSER_PARAM; +/* + Full-text parser descriptor. + + interface_version is, e.g., MYSQL_FTPARSER_INTERFACE_VERSION. + The parsing, initialization, and deinitialization functions are + invoked per SQL statement for which the parser is used. +*/ + struct st_mysql_ftparser { int interface_version; |