summaryrefslogtreecommitdiff
path: root/storage/xtradb/fts/fts0ast.cc
diff options
context:
space:
mode:
Diffstat (limited to 'storage/xtradb/fts/fts0ast.cc')
-rw-r--r--storage/xtradb/fts/fts0ast.cc744
1 files changed, 0 insertions, 744 deletions
diff --git a/storage/xtradb/fts/fts0ast.cc b/storage/xtradb/fts/fts0ast.cc
deleted file mode 100644
index 030b972440f..00000000000
--- a/storage/xtradb/fts/fts0ast.cc
+++ /dev/null
@@ -1,744 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file fts/fts0ast.cc
-Full Text Search parser helper file.
-
-Created 2007/3/16 Sunny Bains.
-***********************************************************************/
-
-#include "mem0mem.h"
-#include "fts0ast.h"
-#include "fts0pars.h"
-#include "fts0fts.h"
-
-/* The FTS ast visit pass. */
-enum fts_ast_visit_pass_t {
- FTS_PASS_FIRST, /*!< First visit pass,
- process operators excluding
- FTS_EXIST and FTS_IGNORE */
- FTS_PASS_EXIST, /*!< Exist visit pass,
- process operator FTS_EXIST */
- FTS_PASS_IGNORE /*!< Ignore visit pass,
- process operator FTS_IGNORE */
-};
-
-/******************************************************************//**
-Create an empty fts_ast_node_t.
-@return Create a new node */
-static
-fts_ast_node_t*
-fts_ast_node_create(void)
-/*=====================*/
-{
- fts_ast_node_t* node;
-
- node = (fts_ast_node_t*) ut_malloc(sizeof(*node));
- memset(node, 0x0, sizeof(*node));
-
- return(node);
-}
-
-/******************************************************************//**
-Create a operator fts_ast_node_t.
-@return new node */
-UNIV_INTERN
-fts_ast_node_t*
-fts_ast_create_node_oper(
-/*=====================*/
- void* arg, /*!< in: ast state instance */
- fts_ast_oper_t oper) /*!< in: ast operator */
-{
- fts_ast_node_t* node = fts_ast_node_create();
-
- node->type = FTS_AST_OPER;
- node->oper = oper;
-
- fts_ast_state_add_node((fts_ast_state_t*) arg, node);
-
- return(node);
-}
-
-/******************************************************************//**
-This function takes ownership of the ptr and is responsible
-for free'ing it
-@return new node or a node list with tokenized words */
-UNIV_INTERN
-fts_ast_node_t*
-fts_ast_create_node_term(
-/*=====================*/
- void* arg, /*!< in: ast state instance */
- const fts_ast_string_t* ptr) /*!< in: ast term string */
-{
- fts_ast_state_t* state = static_cast<fts_ast_state_t*>(arg);
- ulint len = ptr->len;
- ulint cur_pos = 0;
- fts_ast_node_t* node = NULL;
- fts_ast_node_t* node_list = NULL;
- fts_ast_node_t* first_node = NULL;
-
- /* Scan the incoming string and filter out any "non-word" characters */
- while (cur_pos < len) {
- fts_string_t str;
- ulint offset;
- ulint cur_len;
-
- cur_len = innobase_mysql_fts_get_token(
- state->charset,
- reinterpret_cast<const byte*>(ptr->str) + cur_pos,
- reinterpret_cast<const byte*>(ptr->str) + len,
- &str, &offset);
-
- if (cur_len == 0) {
- break;
- }
-
- cur_pos += cur_len;
-
- if (str.f_n_char > 0) {
- /* If the subsequent term (after the first one)'s size
- is less than fts_min_token_size or the term is greater
- than fts_max_token_size, we shall ignore that. This is
- to make consistent with MyISAM behavior */
- if ((first_node && (str.f_n_char < fts_min_token_size))
- || str.f_n_char > fts_max_token_size) {
- continue;
- }
-
- node = fts_ast_node_create();
-
- node->type = FTS_AST_TERM;
-
- node->term.ptr = fts_ast_string_create(
- str.f_str, str.f_len);
-
- fts_ast_state_add_node(
- static_cast<fts_ast_state_t*>(arg), node);
-
- if (first_node) {
- /* There is more than one word, create
- a list to organize them */
- if (!node_list) {
- node_list = fts_ast_create_node_list(
- static_cast<fts_ast_state_t*>(
- arg),
- first_node);
- }
-
- fts_ast_add_node(node_list, node);
- } else {
- first_node = node;
- }
- }
- }
-
- return((node_list != NULL) ? node_list : first_node);
-}
-
-/******************************************************************//**
-This function takes ownership of the ptr and is responsible
-for free'ing it.
-@return new node */
-UNIV_INTERN
-fts_ast_node_t*
-fts_ast_create_node_text(
-/*=====================*/
- void* arg, /*!< in: ast state instance */
- const fts_ast_string_t* ptr) /*!< in: ast text string */
-{
- ulint len = ptr->len;
- fts_ast_node_t* node = NULL;
-
- /* Once we come here, the string must have at least 2 quotes ""
- around the query string, which could be empty. Also the query
- string may contain 0x00 in it, we don't treat it as null-terminated. */
- ut_ad(len >= 2);
- ut_ad(ptr->str[0] == '\"' && ptr->str[len - 1] == '\"');
-
- if (len == 2) {
- /* If the query string contains nothing except quotes,
- it's obviously an invalid query. */
- return(NULL);
- }
-
- node = fts_ast_node_create();
-
- /*!< We ignore the actual quotes "" */
- len -= 2;
-
- node->type = FTS_AST_TEXT;
- /*!< Skip copying the first quote */
- node->text.ptr = fts_ast_string_create(
- reinterpret_cast<const byte*>(ptr->str + 1), len);
- node->text.distance = ULINT_UNDEFINED;
-
- fts_ast_state_add_node((fts_ast_state_t*) arg, node);
-
- return(node);
-}
-
-/******************************************************************//**
-This function takes ownership of the expr and is responsible
-for free'ing it.
-@return new node */
-UNIV_INTERN
-fts_ast_node_t*
-fts_ast_create_node_list(
-/*=====================*/
- void* arg, /*!< in: ast state instance */
- fts_ast_node_t* expr) /*!< in: ast expr instance */
-{
- fts_ast_node_t* node = fts_ast_node_create();
-
- node->type = FTS_AST_LIST;
- node->list.head = node->list.tail = expr;
-
- fts_ast_state_add_node((fts_ast_state_t*) arg, node);
-
- return(node);
-}
-
-/******************************************************************//**
-Create a sub-expression list node. This function takes ownership of
-expr and is responsible for deleting it.
-@return new node */
-UNIV_INTERN
-fts_ast_node_t*
-fts_ast_create_node_subexp_list(
-/*============================*/
- void* arg, /*!< in: ast state instance */
- fts_ast_node_t* expr) /*!< in: ast expr instance */
-{
- fts_ast_node_t* node = fts_ast_node_create();
-
- node->type = FTS_AST_SUBEXP_LIST;
- node->list.head = node->list.tail = expr;
-
- fts_ast_state_add_node((fts_ast_state_t*) arg, node);
-
- return(node);
-}
-
-/******************************************************************//**
-Free an expr list node elements. */
-static
-void
-fts_ast_free_list(
-/*==============*/
- fts_ast_node_t* node) /*!< in: ast node to free */
-{
- ut_a(node->type == FTS_AST_LIST
- || node->type == FTS_AST_SUBEXP_LIST);
-
- for (node = node->list.head;
- node != NULL;
- node = fts_ast_free_node(node)) {
-
- /*!< No op */
- }
-}
-
-/********************************************************************//**
-Free a fts_ast_node_t instance.
-@return next node to free */
-UNIV_INTERN
-fts_ast_node_t*
-fts_ast_free_node(
-/*==============*/
- fts_ast_node_t* node) /*!< in: the node to free */
-{
- fts_ast_node_t* next_node;
-
- switch (node->type) {
- case FTS_AST_TEXT:
- if (node->text.ptr) {
- fts_ast_string_free(node->text.ptr);
- node->text.ptr = NULL;
- }
- break;
-
- case FTS_AST_TERM:
- if (node->term.ptr) {
- fts_ast_string_free(node->term.ptr);
- node->term.ptr = NULL;
- }
- break;
-
- case FTS_AST_LIST:
- case FTS_AST_SUBEXP_LIST:
- fts_ast_free_list(node);
- node->list.head = node->list.tail = NULL;
- break;
-
- case FTS_AST_OPER:
- break;
-
- default:
- ut_error;
- }
-
- /*!< Get next node before freeing the node itself */
- next_node = node->next;
-
- ut_free(node);
-
- return(next_node);
-}
-
-/******************************************************************//**
-This AST takes ownership of the expr and is responsible
-for free'ing it.
-@return in param "list" */
-UNIV_INTERN
-fts_ast_node_t*
-fts_ast_add_node(
-/*=============*/
- fts_ast_node_t* node, /*!< in: list instance */
- fts_ast_node_t* elem) /*!< in: node to add to list */
-{
- if (!elem) {
- return(NULL);
- }
-
- ut_a(!elem->next);
- ut_a(node->type == FTS_AST_LIST
- || node->type == FTS_AST_SUBEXP_LIST);
-
- if (!node->list.head) {
- ut_a(!node->list.tail);
-
- node->list.head = node->list.tail = elem;
- } else {
- ut_a(node->list.tail);
-
- node->list.tail->next = elem;
- node->list.tail = elem;
- }
-
- return(node);
-}
-
-/******************************************************************//**
-For tracking node allocations, in case there is an error during
-parsing. */
-UNIV_INTERN
-void
-fts_ast_state_add_node(
-/*===================*/
- fts_ast_state_t*state, /*!< in: ast instance */
- fts_ast_node_t* node) /*!< in: node to add to ast */
-{
- if (!state->list.head) {
- ut_a(!state->list.tail);
-
- state->list.head = state->list.tail = node;
- } else {
- state->list.tail->next_alloc = node;
- state->list.tail = node;
- }
-}
-
-/******************************************************************//**
-Set the wildcard attribute of a term. */
-UNIV_INTERN
-void
-fts_ast_term_set_wildcard(
-/*======================*/
- fts_ast_node_t* node) /*!< in/out: set attribute of
- a term node */
-{
- if (!node) {
- return;
- }
-
- /* If it's a node list, the wildcard should be set to the tail node*/
- if (node->type == FTS_AST_LIST) {
- ut_ad(node->list.tail != NULL);
- node = node->list.tail;
- }
-
- ut_a(node->type == FTS_AST_TERM);
- ut_a(!node->term.wildcard);
-
- node->term.wildcard = TRUE;
-}
-
-/******************************************************************//**
-Set the proximity attribute of a text node. */
-UNIV_INTERN
-void
-fts_ast_term_set_distance(
-/*======================*/
- fts_ast_node_t* node, /*!< in/out: text node */
- ulint distance) /*!< in: the text proximity
- distance */
-{
- if (node == NULL) {
- return;
- }
-
- ut_a(node->type == FTS_AST_TEXT);
- ut_a(node->text.distance == ULINT_UNDEFINED);
-
- node->text.distance = distance;
-}
-
-/******************************************************************//**
-Free node and expr allocations. */
-UNIV_INTERN
-void
-fts_ast_state_free(
-/*===============*/
- fts_ast_state_t*state) /*!< in: ast state to free */
-{
- fts_ast_node_t* node = state->list.head;
-
- /* Free the nodes that were allocated during parsing. */
- while (node) {
- fts_ast_node_t* next = node->next_alloc;
-
- if (node->type == FTS_AST_TEXT && node->text.ptr) {
- fts_ast_string_free(node->text.ptr);
- node->text.ptr = NULL;
- } else if (node->type == FTS_AST_TERM && node->term.ptr) {
- fts_ast_string_free(node->term.ptr);
- node->term.ptr = NULL;
- }
-
- ut_free(node);
- node = next;
- }
-
- state->root = state->list.head = state->list.tail = NULL;
-}
-
-/******************************************************************//**
-Print an ast node. */
-UNIV_INTERN
-void
-fts_ast_node_print(
-/*===============*/
- fts_ast_node_t* node) /*!< in: ast node to print */
-{
- switch (node->type) {
- case FTS_AST_TEXT:
- printf("TEXT: ");
- fts_ast_string_print(node->text.ptr);
- break;
-
- case FTS_AST_TERM:
- printf("TERM: ");
- fts_ast_string_print(node->term.ptr);
- break;
-
- case FTS_AST_LIST:
- printf("LIST: ");
- node = node->list.head;
-
- while (node) {
- fts_ast_node_print(node);
- node = node->next;
- }
- break;
-
- case FTS_AST_SUBEXP_LIST:
- printf("SUBEXP_LIST: ");
- node = node->list.head;
-
- while (node) {
- fts_ast_node_print(node);
- node = node->next;
- }
- case FTS_AST_OPER:
- printf("OPER: %d\n", node->oper);
- break;
-
- default:
- ut_error;
- }
-}
-
-/******************************************************************//**
-Traverse the AST - in-order traversal, except for the FTX_EXIST and FTS_IGNORE
-nodes, which will be ignored in the first pass of each level, and visited in a
-second and third pass after all other nodes in the same level are visited.
-@return DB_SUCCESS if all went well */
-UNIV_INTERN
-dberr_t
-fts_ast_visit(
-/*==========*/
- fts_ast_oper_t oper, /*!< in: current operator */
- fts_ast_node_t* node, /*!< in: current root node */
- fts_ast_callback visitor, /*!< in: callback function */
- void* arg, /*!< in: arg for callback */
- bool* has_ignore) /*!< out: true, if the operator
- was ignored during processing,
- currently we ignore FTS_EXIST
- and FTS_IGNORE operators */
-{
- dberr_t error = DB_SUCCESS;
- fts_ast_node_t* oper_node = NULL;
- fts_ast_node_t* start_node;
- bool revisit = false;
- bool will_be_ignored = false;
- fts_ast_visit_pass_t visit_pass = FTS_PASS_FIRST;
-
- start_node = node->list.head;
-
- ut_a(node->type == FTS_AST_LIST
- || node->type == FTS_AST_SUBEXP_LIST);
-
- if (oper == FTS_EXIST_SKIP) {
- visit_pass = FTS_PASS_EXIST;
- } else if (oper == FTS_IGNORE_SKIP) {
- visit_pass = FTS_PASS_IGNORE;
- }
-
- /* In the first pass of the tree, at the leaf level of the
- tree, FTS_EXIST and FTS_IGNORE operation will be ignored.
- It will be repeated at the level above the leaf level.
-
- The basic idea here is that when we encounter FTS_EXIST or
- FTS_IGNORE, we will change the operator node into FTS_EXIST_SKIP
- or FTS_IGNORE_SKIP, and term node & text node with the operators
- is ignored in the first pass. We have two passes during the revisit:
- We process nodes with FTS_EXIST_SKIP in the exist pass, and then
- process nodes with FTS_IGNORE_SKIP in the ignore pass.
-
- The order should be restrictly followed, or we will get wrong results.
- For example, we have a query 'a +b -c d +e -f'.
- first pass: process 'a' and 'd' by union;
- exist pass: process '+b' and '+e' by intersection;
- ignore pass: process '-c' and '-f' by difference. */
-
- for (node = node->list.head;
- node && (error == DB_SUCCESS);
- node = node->next) {
-
- switch(node->type) {
- case FTS_AST_LIST:
- if (visit_pass != FTS_PASS_FIRST) {
- break;
- }
-
- error = fts_ast_visit(oper, node, visitor,
- arg, &will_be_ignored);
-
- /* If will_be_ignored is set to true, then
- we encountered and ignored a FTS_EXIST or FTS_IGNORE
- operator. */
- if (will_be_ignored) {
- revisit = true;
- /* Remember oper for list in case '-abc&def',
- ignored oper is from previous node of list.*/
- node->oper = oper;
- }
-
- break;
-
- case FTS_AST_OPER:
- oper = node->oper;
- oper_node = node;
-
- /* Change the operator for revisit */
- if (oper == FTS_EXIST) {
- oper_node->oper = FTS_EXIST_SKIP;
- } else if (oper == FTS_IGNORE) {
- oper_node->oper = FTS_IGNORE_SKIP;
- }
-
- break;
-
- default:
- if (node->visited) {
- continue;
- }
-
- ut_a(oper == FTS_NONE || !oper_node
- || oper_node->oper == oper
- || oper_node->oper == FTS_EXIST_SKIP
- || oper_node->oper == FTS_IGNORE_SKIP);
-
- if (oper== FTS_EXIST || oper == FTS_IGNORE) {
- *has_ignore = true;
- continue;
- }
-
- /* Process leaf node accroding to its pass.*/
- if (oper == FTS_EXIST_SKIP
- && visit_pass == FTS_PASS_EXIST) {
- error = visitor(FTS_EXIST, node, arg);
- node->visited = true;
- } else if (oper == FTS_IGNORE_SKIP
- && visit_pass == FTS_PASS_IGNORE) {
- error = visitor(FTS_IGNORE, node, arg);
- node->visited = true;
- } else if (visit_pass == FTS_PASS_FIRST) {
- error = visitor(oper, node, arg);
- node->visited = true;
- }
- }
- }
-
- if (revisit) {
- /* Exist pass processes the skipped FTS_EXIST operation. */
- for (node = start_node;
- node && error == DB_SUCCESS;
- node = node->next) {
-
- if (node->type == FTS_AST_LIST
- && node->oper != FTS_IGNORE) {
- error = fts_ast_visit(FTS_EXIST_SKIP, node,
- visitor, arg, &will_be_ignored);
- }
- }
-
- /* Ignore pass processes the skipped FTS_IGNORE operation. */
- for (node = start_node;
- node && error == DB_SUCCESS;
- node = node->next) {
-
- if (node->type == FTS_AST_LIST) {
- error = fts_ast_visit(FTS_IGNORE_SKIP, node,
- visitor, arg, &will_be_ignored);
- }
- }
- }
-
- return(error);
-}
-
-/**
-Create an ast string object, with NUL-terminator, so the string
-has one more byte than len
-@param[in] str pointer to string
-@param[in] len length of the string
-@return ast string with NUL-terminator */
-UNIV_INTERN
-fts_ast_string_t*
-fts_ast_string_create(
- const byte* str,
- ulint len)
-{
- fts_ast_string_t* ast_str;
-
- ut_ad(len > 0);
-
- ast_str = static_cast<fts_ast_string_t*>
- (ut_malloc(sizeof(fts_ast_string_t)));
- ast_str->str = static_cast<byte*>(ut_malloc(len + 1));
-
- ast_str->len = len;
- memcpy(ast_str->str, str, len);
- ast_str->str[len] = '\0';
-
- return(ast_str);
-}
-
-/**
-Free an ast string instance
-@param[in,out] ast_str string to free */
-UNIV_INTERN
-void
-fts_ast_string_free(
- fts_ast_string_t* ast_str)
-{
- if (ast_str != NULL) {
- ut_free(ast_str->str);
- ut_free(ast_str);
- }
-}
-
-/**
-Translate ast string of type FTS_AST_NUMB to unsigned long by strtoul
-@param[in] str string to translate
-@param[in] base the base
-@return translated number */
-UNIV_INTERN
-ulint
-fts_ast_string_to_ul(
- const fts_ast_string_t* ast_str,
- int base)
-{
- return(strtoul(reinterpret_cast<const char*>(ast_str->str),
- NULL, base));
-}
-
-/**
-Print the ast string
-@param[in] str string to print */
-UNIV_INTERN
-void
-fts_ast_string_print(
- const fts_ast_string_t* ast_str)
-{
- for (ulint i = 0; i < ast_str->len; ++i) {
- printf("%c", ast_str->str[i]);
- }
-
- printf("\n");
-}
-
-#ifdef UNIV_DEBUG
-const char*
-fts_ast_oper_name_get(fts_ast_oper_t oper)
-{
- switch(oper) {
- case FTS_NONE:
- return("FTS_NONE");
- case FTS_IGNORE:
- return("FTS_IGNORE");
- case FTS_EXIST:
- return("FTS_EXIST");
- case FTS_NEGATE:
- return("FTS_NEGATE");
- case FTS_INCR_RATING:
- return("FTS_INCR_RATING");
- case FTS_DECR_RATING:
- return("FTS_DECR_RATING");
- case FTS_DISTANCE:
- return("FTS_DISTANCE");
- case FTS_IGNORE_SKIP:
- return("FTS_IGNORE_SKIP");
- case FTS_EXIST_SKIP:
- return("FTS_EXIST_SKIP");
- }
- ut_ad(0);
-}
-
-const char*
-fts_ast_node_type_get(fts_ast_type_t type)
-{
- switch (type) {
- case FTS_AST_OPER:
- return("FTS_AST_OPER");
- case FTS_AST_NUMB:
- return("FTS_AST_NUMB");
- case FTS_AST_TERM:
- return("FTS_AST_TERM");
- case FTS_AST_TEXT:
- return("FTS_AST_TEXT");
- case FTS_AST_LIST:
- return("FTS_AST_LIST");
- case FTS_AST_SUBEXP_LIST:
- return("FTS_AST_SUBEXP_LIST");
- }
- ut_ad(0);
-}
-#endif /* UNIV_DEBUG */