diff options
Diffstat (limited to 'storage/xtradb/fts/fts0ast.cc')
-rw-r--r-- | storage/xtradb/fts/fts0ast.cc | 744 |
1 files changed, 0 insertions, 744 deletions
diff --git a/storage/xtradb/fts/fts0ast.cc b/storage/xtradb/fts/fts0ast.cc deleted file mode 100644 index 030b972440f..00000000000 --- a/storage/xtradb/fts/fts0ast.cc +++ /dev/null @@ -1,744 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file fts/fts0ast.cc -Full Text Search parser helper file. - -Created 2007/3/16 Sunny Bains. -***********************************************************************/ - -#include "mem0mem.h" -#include "fts0ast.h" -#include "fts0pars.h" -#include "fts0fts.h" - -/* The FTS ast visit pass. */ -enum fts_ast_visit_pass_t { - FTS_PASS_FIRST, /*!< First visit pass, - process operators excluding - FTS_EXIST and FTS_IGNORE */ - FTS_PASS_EXIST, /*!< Exist visit pass, - process operator FTS_EXIST */ - FTS_PASS_IGNORE /*!< Ignore visit pass, - process operator FTS_IGNORE */ -}; - -/******************************************************************//** -Create an empty fts_ast_node_t. -@return Create a new node */ -static -fts_ast_node_t* -fts_ast_node_create(void) -/*=====================*/ -{ - fts_ast_node_t* node; - - node = (fts_ast_node_t*) ut_malloc(sizeof(*node)); - memset(node, 0x0, sizeof(*node)); - - return(node); -} - -/******************************************************************//** -Create a operator fts_ast_node_t. -@return new node */ -UNIV_INTERN -fts_ast_node_t* -fts_ast_create_node_oper( -/*=====================*/ - void* arg, /*!< in: ast state instance */ - fts_ast_oper_t oper) /*!< in: ast operator */ -{ - fts_ast_node_t* node = fts_ast_node_create(); - - node->type = FTS_AST_OPER; - node->oper = oper; - - fts_ast_state_add_node((fts_ast_state_t*) arg, node); - - return(node); -} - -/******************************************************************//** -This function takes ownership of the ptr and is responsible -for free'ing it -@return new node or a node list with tokenized words */ -UNIV_INTERN -fts_ast_node_t* -fts_ast_create_node_term( -/*=====================*/ - void* arg, /*!< in: ast state instance */ - const fts_ast_string_t* ptr) /*!< in: ast term string */ -{ - fts_ast_state_t* state = static_cast<fts_ast_state_t*>(arg); - ulint len = ptr->len; - ulint cur_pos = 0; - fts_ast_node_t* node = NULL; - fts_ast_node_t* node_list = NULL; - fts_ast_node_t* first_node = NULL; - - /* Scan the incoming string and filter out any "non-word" characters */ - while (cur_pos < len) { - fts_string_t str; - ulint offset; - ulint cur_len; - - cur_len = innobase_mysql_fts_get_token( - state->charset, - reinterpret_cast<const byte*>(ptr->str) + cur_pos, - reinterpret_cast<const byte*>(ptr->str) + len, - &str, &offset); - - if (cur_len == 0) { - break; - } - - cur_pos += cur_len; - - if (str.f_n_char > 0) { - /* If the subsequent term (after the first one)'s size - is less than fts_min_token_size or the term is greater - than fts_max_token_size, we shall ignore that. This is - to make consistent with MyISAM behavior */ - if ((first_node && (str.f_n_char < fts_min_token_size)) - || str.f_n_char > fts_max_token_size) { - continue; - } - - node = fts_ast_node_create(); - - node->type = FTS_AST_TERM; - - node->term.ptr = fts_ast_string_create( - str.f_str, str.f_len); - - fts_ast_state_add_node( - static_cast<fts_ast_state_t*>(arg), node); - - if (first_node) { - /* There is more than one word, create - a list to organize them */ - if (!node_list) { - node_list = fts_ast_create_node_list( - static_cast<fts_ast_state_t*>( - arg), - first_node); - } - - fts_ast_add_node(node_list, node); - } else { - first_node = node; - } - } - } - - return((node_list != NULL) ? node_list : first_node); -} - -/******************************************************************//** -This function takes ownership of the ptr and is responsible -for free'ing it. -@return new node */ -UNIV_INTERN -fts_ast_node_t* -fts_ast_create_node_text( -/*=====================*/ - void* arg, /*!< in: ast state instance */ - const fts_ast_string_t* ptr) /*!< in: ast text string */ -{ - ulint len = ptr->len; - fts_ast_node_t* node = NULL; - - /* Once we come here, the string must have at least 2 quotes "" - around the query string, which could be empty. Also the query - string may contain 0x00 in it, we don't treat it as null-terminated. */ - ut_ad(len >= 2); - ut_ad(ptr->str[0] == '\"' && ptr->str[len - 1] == '\"'); - - if (len == 2) { - /* If the query string contains nothing except quotes, - it's obviously an invalid query. */ - return(NULL); - } - - node = fts_ast_node_create(); - - /*!< We ignore the actual quotes "" */ - len -= 2; - - node->type = FTS_AST_TEXT; - /*!< Skip copying the first quote */ - node->text.ptr = fts_ast_string_create( - reinterpret_cast<const byte*>(ptr->str + 1), len); - node->text.distance = ULINT_UNDEFINED; - - fts_ast_state_add_node((fts_ast_state_t*) arg, node); - - return(node); -} - -/******************************************************************//** -This function takes ownership of the expr and is responsible -for free'ing it. -@return new node */ -UNIV_INTERN -fts_ast_node_t* -fts_ast_create_node_list( -/*=====================*/ - void* arg, /*!< in: ast state instance */ - fts_ast_node_t* expr) /*!< in: ast expr instance */ -{ - fts_ast_node_t* node = fts_ast_node_create(); - - node->type = FTS_AST_LIST; - node->list.head = node->list.tail = expr; - - fts_ast_state_add_node((fts_ast_state_t*) arg, node); - - return(node); -} - -/******************************************************************//** -Create a sub-expression list node. This function takes ownership of -expr and is responsible for deleting it. -@return new node */ -UNIV_INTERN -fts_ast_node_t* -fts_ast_create_node_subexp_list( -/*============================*/ - void* arg, /*!< in: ast state instance */ - fts_ast_node_t* expr) /*!< in: ast expr instance */ -{ - fts_ast_node_t* node = fts_ast_node_create(); - - node->type = FTS_AST_SUBEXP_LIST; - node->list.head = node->list.tail = expr; - - fts_ast_state_add_node((fts_ast_state_t*) arg, node); - - return(node); -} - -/******************************************************************//** -Free an expr list node elements. */ -static -void -fts_ast_free_list( -/*==============*/ - fts_ast_node_t* node) /*!< in: ast node to free */ -{ - ut_a(node->type == FTS_AST_LIST - || node->type == FTS_AST_SUBEXP_LIST); - - for (node = node->list.head; - node != NULL; - node = fts_ast_free_node(node)) { - - /*!< No op */ - } -} - -/********************************************************************//** -Free a fts_ast_node_t instance. -@return next node to free */ -UNIV_INTERN -fts_ast_node_t* -fts_ast_free_node( -/*==============*/ - fts_ast_node_t* node) /*!< in: the node to free */ -{ - fts_ast_node_t* next_node; - - switch (node->type) { - case FTS_AST_TEXT: - if (node->text.ptr) { - fts_ast_string_free(node->text.ptr); - node->text.ptr = NULL; - } - break; - - case FTS_AST_TERM: - if (node->term.ptr) { - fts_ast_string_free(node->term.ptr); - node->term.ptr = NULL; - } - break; - - case FTS_AST_LIST: - case FTS_AST_SUBEXP_LIST: - fts_ast_free_list(node); - node->list.head = node->list.tail = NULL; - break; - - case FTS_AST_OPER: - break; - - default: - ut_error; - } - - /*!< Get next node before freeing the node itself */ - next_node = node->next; - - ut_free(node); - - return(next_node); -} - -/******************************************************************//** -This AST takes ownership of the expr and is responsible -for free'ing it. -@return in param "list" */ -UNIV_INTERN -fts_ast_node_t* -fts_ast_add_node( -/*=============*/ - fts_ast_node_t* node, /*!< in: list instance */ - fts_ast_node_t* elem) /*!< in: node to add to list */ -{ - if (!elem) { - return(NULL); - } - - ut_a(!elem->next); - ut_a(node->type == FTS_AST_LIST - || node->type == FTS_AST_SUBEXP_LIST); - - if (!node->list.head) { - ut_a(!node->list.tail); - - node->list.head = node->list.tail = elem; - } else { - ut_a(node->list.tail); - - node->list.tail->next = elem; - node->list.tail = elem; - } - - return(node); -} - -/******************************************************************//** -For tracking node allocations, in case there is an error during -parsing. */ -UNIV_INTERN -void -fts_ast_state_add_node( -/*===================*/ - fts_ast_state_t*state, /*!< in: ast instance */ - fts_ast_node_t* node) /*!< in: node to add to ast */ -{ - if (!state->list.head) { - ut_a(!state->list.tail); - - state->list.head = state->list.tail = node; - } else { - state->list.tail->next_alloc = node; - state->list.tail = node; - } -} - -/******************************************************************//** -Set the wildcard attribute of a term. */ -UNIV_INTERN -void -fts_ast_term_set_wildcard( -/*======================*/ - fts_ast_node_t* node) /*!< in/out: set attribute of - a term node */ -{ - if (!node) { - return; - } - - /* If it's a node list, the wildcard should be set to the tail node*/ - if (node->type == FTS_AST_LIST) { - ut_ad(node->list.tail != NULL); - node = node->list.tail; - } - - ut_a(node->type == FTS_AST_TERM); - ut_a(!node->term.wildcard); - - node->term.wildcard = TRUE; -} - -/******************************************************************//** -Set the proximity attribute of a text node. */ -UNIV_INTERN -void -fts_ast_term_set_distance( -/*======================*/ - fts_ast_node_t* node, /*!< in/out: text node */ - ulint distance) /*!< in: the text proximity - distance */ -{ - if (node == NULL) { - return; - } - - ut_a(node->type == FTS_AST_TEXT); - ut_a(node->text.distance == ULINT_UNDEFINED); - - node->text.distance = distance; -} - -/******************************************************************//** -Free node and expr allocations. */ -UNIV_INTERN -void -fts_ast_state_free( -/*===============*/ - fts_ast_state_t*state) /*!< in: ast state to free */ -{ - fts_ast_node_t* node = state->list.head; - - /* Free the nodes that were allocated during parsing. */ - while (node) { - fts_ast_node_t* next = node->next_alloc; - - if (node->type == FTS_AST_TEXT && node->text.ptr) { - fts_ast_string_free(node->text.ptr); - node->text.ptr = NULL; - } else if (node->type == FTS_AST_TERM && node->term.ptr) { - fts_ast_string_free(node->term.ptr); - node->term.ptr = NULL; - } - - ut_free(node); - node = next; - } - - state->root = state->list.head = state->list.tail = NULL; -} - -/******************************************************************//** -Print an ast node. */ -UNIV_INTERN -void -fts_ast_node_print( -/*===============*/ - fts_ast_node_t* node) /*!< in: ast node to print */ -{ - switch (node->type) { - case FTS_AST_TEXT: - printf("TEXT: "); - fts_ast_string_print(node->text.ptr); - break; - - case FTS_AST_TERM: - printf("TERM: "); - fts_ast_string_print(node->term.ptr); - break; - - case FTS_AST_LIST: - printf("LIST: "); - node = node->list.head; - - while (node) { - fts_ast_node_print(node); - node = node->next; - } - break; - - case FTS_AST_SUBEXP_LIST: - printf("SUBEXP_LIST: "); - node = node->list.head; - - while (node) { - fts_ast_node_print(node); - node = node->next; - } - case FTS_AST_OPER: - printf("OPER: %d\n", node->oper); - break; - - default: - ut_error; - } -} - -/******************************************************************//** -Traverse the AST - in-order traversal, except for the FTX_EXIST and FTS_IGNORE -nodes, which will be ignored in the first pass of each level, and visited in a -second and third pass after all other nodes in the same level are visited. -@return DB_SUCCESS if all went well */ -UNIV_INTERN -dberr_t -fts_ast_visit( -/*==========*/ - fts_ast_oper_t oper, /*!< in: current operator */ - fts_ast_node_t* node, /*!< in: current root node */ - fts_ast_callback visitor, /*!< in: callback function */ - void* arg, /*!< in: arg for callback */ - bool* has_ignore) /*!< out: true, if the operator - was ignored during processing, - currently we ignore FTS_EXIST - and FTS_IGNORE operators */ -{ - dberr_t error = DB_SUCCESS; - fts_ast_node_t* oper_node = NULL; - fts_ast_node_t* start_node; - bool revisit = false; - bool will_be_ignored = false; - fts_ast_visit_pass_t visit_pass = FTS_PASS_FIRST; - - start_node = node->list.head; - - ut_a(node->type == FTS_AST_LIST - || node->type == FTS_AST_SUBEXP_LIST); - - if (oper == FTS_EXIST_SKIP) { - visit_pass = FTS_PASS_EXIST; - } else if (oper == FTS_IGNORE_SKIP) { - visit_pass = FTS_PASS_IGNORE; - } - - /* In the first pass of the tree, at the leaf level of the - tree, FTS_EXIST and FTS_IGNORE operation will be ignored. - It will be repeated at the level above the leaf level. - - The basic idea here is that when we encounter FTS_EXIST or - FTS_IGNORE, we will change the operator node into FTS_EXIST_SKIP - or FTS_IGNORE_SKIP, and term node & text node with the operators - is ignored in the first pass. We have two passes during the revisit: - We process nodes with FTS_EXIST_SKIP in the exist pass, and then - process nodes with FTS_IGNORE_SKIP in the ignore pass. - - The order should be restrictly followed, or we will get wrong results. - For example, we have a query 'a +b -c d +e -f'. - first pass: process 'a' and 'd' by union; - exist pass: process '+b' and '+e' by intersection; - ignore pass: process '-c' and '-f' by difference. */ - - for (node = node->list.head; - node && (error == DB_SUCCESS); - node = node->next) { - - switch(node->type) { - case FTS_AST_LIST: - if (visit_pass != FTS_PASS_FIRST) { - break; - } - - error = fts_ast_visit(oper, node, visitor, - arg, &will_be_ignored); - - /* If will_be_ignored is set to true, then - we encountered and ignored a FTS_EXIST or FTS_IGNORE - operator. */ - if (will_be_ignored) { - revisit = true; - /* Remember oper for list in case '-abc&def', - ignored oper is from previous node of list.*/ - node->oper = oper; - } - - break; - - case FTS_AST_OPER: - oper = node->oper; - oper_node = node; - - /* Change the operator for revisit */ - if (oper == FTS_EXIST) { - oper_node->oper = FTS_EXIST_SKIP; - } else if (oper == FTS_IGNORE) { - oper_node->oper = FTS_IGNORE_SKIP; - } - - break; - - default: - if (node->visited) { - continue; - } - - ut_a(oper == FTS_NONE || !oper_node - || oper_node->oper == oper - || oper_node->oper == FTS_EXIST_SKIP - || oper_node->oper == FTS_IGNORE_SKIP); - - if (oper== FTS_EXIST || oper == FTS_IGNORE) { - *has_ignore = true; - continue; - } - - /* Process leaf node accroding to its pass.*/ - if (oper == FTS_EXIST_SKIP - && visit_pass == FTS_PASS_EXIST) { - error = visitor(FTS_EXIST, node, arg); - node->visited = true; - } else if (oper == FTS_IGNORE_SKIP - && visit_pass == FTS_PASS_IGNORE) { - error = visitor(FTS_IGNORE, node, arg); - node->visited = true; - } else if (visit_pass == FTS_PASS_FIRST) { - error = visitor(oper, node, arg); - node->visited = true; - } - } - } - - if (revisit) { - /* Exist pass processes the skipped FTS_EXIST operation. */ - for (node = start_node; - node && error == DB_SUCCESS; - node = node->next) { - - if (node->type == FTS_AST_LIST - && node->oper != FTS_IGNORE) { - error = fts_ast_visit(FTS_EXIST_SKIP, node, - visitor, arg, &will_be_ignored); - } - } - - /* Ignore pass processes the skipped FTS_IGNORE operation. */ - for (node = start_node; - node && error == DB_SUCCESS; - node = node->next) { - - if (node->type == FTS_AST_LIST) { - error = fts_ast_visit(FTS_IGNORE_SKIP, node, - visitor, arg, &will_be_ignored); - } - } - } - - return(error); -} - -/** -Create an ast string object, with NUL-terminator, so the string -has one more byte than len -@param[in] str pointer to string -@param[in] len length of the string -@return ast string with NUL-terminator */ -UNIV_INTERN -fts_ast_string_t* -fts_ast_string_create( - const byte* str, - ulint len) -{ - fts_ast_string_t* ast_str; - - ut_ad(len > 0); - - ast_str = static_cast<fts_ast_string_t*> - (ut_malloc(sizeof(fts_ast_string_t))); - ast_str->str = static_cast<byte*>(ut_malloc(len + 1)); - - ast_str->len = len; - memcpy(ast_str->str, str, len); - ast_str->str[len] = '\0'; - - return(ast_str); -} - -/** -Free an ast string instance -@param[in,out] ast_str string to free */ -UNIV_INTERN -void -fts_ast_string_free( - fts_ast_string_t* ast_str) -{ - if (ast_str != NULL) { - ut_free(ast_str->str); - ut_free(ast_str); - } -} - -/** -Translate ast string of type FTS_AST_NUMB to unsigned long by strtoul -@param[in] str string to translate -@param[in] base the base -@return translated number */ -UNIV_INTERN -ulint -fts_ast_string_to_ul( - const fts_ast_string_t* ast_str, - int base) -{ - return(strtoul(reinterpret_cast<const char*>(ast_str->str), - NULL, base)); -} - -/** -Print the ast string -@param[in] str string to print */ -UNIV_INTERN -void -fts_ast_string_print( - const fts_ast_string_t* ast_str) -{ - for (ulint i = 0; i < ast_str->len; ++i) { - printf("%c", ast_str->str[i]); - } - - printf("\n"); -} - -#ifdef UNIV_DEBUG -const char* -fts_ast_oper_name_get(fts_ast_oper_t oper) -{ - switch(oper) { - case FTS_NONE: - return("FTS_NONE"); - case FTS_IGNORE: - return("FTS_IGNORE"); - case FTS_EXIST: - return("FTS_EXIST"); - case FTS_NEGATE: - return("FTS_NEGATE"); - case FTS_INCR_RATING: - return("FTS_INCR_RATING"); - case FTS_DECR_RATING: - return("FTS_DECR_RATING"); - case FTS_DISTANCE: - return("FTS_DISTANCE"); - case FTS_IGNORE_SKIP: - return("FTS_IGNORE_SKIP"); - case FTS_EXIST_SKIP: - return("FTS_EXIST_SKIP"); - } - ut_ad(0); -} - -const char* -fts_ast_node_type_get(fts_ast_type_t type) -{ - switch (type) { - case FTS_AST_OPER: - return("FTS_AST_OPER"); - case FTS_AST_NUMB: - return("FTS_AST_NUMB"); - case FTS_AST_TERM: - return("FTS_AST_TERM"); - case FTS_AST_TEXT: - return("FTS_AST_TEXT"); - case FTS_AST_LIST: - return("FTS_AST_LIST"); - case FTS_AST_SUBEXP_LIST: - return("FTS_AST_SUBEXP_LIST"); - } - ut_ad(0); -} -#endif /* UNIV_DEBUG */ |