/***************************************************************************** Copyright (c) 2007, 2018, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2016, 2020, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA *****************************************************************************/ /******************************************************************//** @file include/fts0ast.h The FTS query parser (AST) abstract syntax tree routines Created 2007/03/16/03 Sunny Bains *******************************************************/ #ifndef INNOBASE_FST0AST_H #define INNOBASE_FST0AST_H #include "mem0mem.h" /* The type of AST Node */ enum fts_ast_type_t { FTS_AST_OPER, /*!< Operator */ FTS_AST_NUMB, /*!< Number */ FTS_AST_TERM, /*!< Term (or word) */ FTS_AST_TEXT, /*!< Text string */ FTS_AST_PARSER_PHRASE_LIST, /*!< Phase for plugin parser The difference from text type is that we tokenize text into term list */ FTS_AST_LIST, /*!< Expression list */ FTS_AST_SUBEXP_LIST /*!< Sub-Expression list */ }; /* The FTS query operators that we support */ enum fts_ast_oper_t { FTS_NONE, /*!< No operator */ FTS_IGNORE, /*!< Ignore rows that contain this word */ FTS_EXIST, /*!< Include rows that contain this word */ FTS_NEGATE, /*!< Include rows that contain this word but rank them lower*/ FTS_INCR_RATING, /*!< Increase the rank for this word*/ FTS_DECR_RATING, /*!< Decrease the rank for this word*/ FTS_DISTANCE, /*!< Proximity distance */ FTS_IGNORE_SKIP, /*!< Transient node operator signifies that this is a FTS_IGNORE node, and ignored in the first pass of fts_ast_visit() */ FTS_EXIST_SKIP /*!< Transient node operator signifies that this ia a FTS_EXIST node, and ignored in the first pass of fts_ast_visit() */ }; /* Data types used by the FTS parser */ struct fts_lexer_t; struct fts_ast_node_t; struct fts_ast_state_t; struct fts_ast_string_t; typedef dberr_t (*fts_ast_callback)(fts_ast_oper_t, fts_ast_node_t*, void*); /******************************************************************** Parse the string using the lexer setup within state.*/ int fts_parse( /*======*/ /* out: 0 on OK, 1 on error */ fts_ast_state_t* state); /*!< in: ast state instance.*/ /******************************************************************** Create an AST operator node */ extern fts_ast_node_t* fts_ast_create_node_oper( /*=====================*/ void* arg, /*!< in: ast state */ fts_ast_oper_t oper); /*!< in: ast operator */ /******************************************************************** Create an AST term node, makes a copy of ptr */ extern fts_ast_node_t* fts_ast_create_node_term( /*=====================*/ void* arg, /*!< in: ast state */ const fts_ast_string_t* ptr); /*!< in: term string */ /******************************************************************** Create an AST text node */ extern fts_ast_node_t* fts_ast_create_node_text( /*=====================*/ void* arg, /*!< in: ast state */ const fts_ast_string_t* ptr); /*!< in: text string */ /******************************************************************** Create an AST expr list node */ extern fts_ast_node_t* fts_ast_create_node_list( /*=====================*/ void* arg, /*!< in: ast state */ fts_ast_node_t* expr); /*!< in: ast expr */ /******************************************************************** Create a sub-expression list node. This function takes ownership of expr and is responsible for deleting it. */ extern fts_ast_node_t* fts_ast_create_node_subexp_list( /*============================*/ /* out: new node */ void* arg, /*!< in: ast state instance */ fts_ast_node_t* expr); /*!< in: ast expr instance */ /******************************************************************** Set the wildcard attribute of a term.*/ extern void fts_ast_term_set_wildcard( /*======================*/ fts_ast_node_t* node); /*!< in: term to change */ /******************************************************************** Set the proximity attribute of a text node. */ void fts_ast_text_set_distance( /*======================*/ fts_ast_node_t* node, /*!< in/out: text node */ ulint distance); /*!< in: the text proximity distance */ /********************************************************************//** Free a fts_ast_node_t instance. @return next node to free */ fts_ast_node_t* fts_ast_free_node( /*==============*/ fts_ast_node_t* node); /*!< in: node to free */ /******************************************************************** Add a sub-expression to an AST*/ extern fts_ast_node_t* fts_ast_add_node( /*=============*/ fts_ast_node_t* list, /*!< in: list node instance */ fts_ast_node_t* node); /*!< in: (sub) expr to add */ /******************************************************************** Print the AST node recursively.*/ extern void fts_ast_node_print( /*===============*/ fts_ast_node_t* node); /*!< in: ast node to print */ /******************************************************************** Free node and expr allocations.*/ extern void fts_ast_state_free( /*===============*/ fts_ast_state_t*state); /*!< in: state instance to free */ /** Check only union operation involved in the node @param[in] node ast node to check @return true if the node contains only union else false. */ bool fts_ast_node_check_union( fts_ast_node_t* node); /******************************************************************//** Traverse the AST - in-order traversal. @return DB_SUCCESS if all went well */ dberr_t fts_ast_visit( /*==========*/ fts_ast_oper_t oper, /*!< in: FTS operator */ fts_ast_node_t* node, /*!< in: instance to traverse*/ fts_ast_callback visitor, /*!< in: callback */ void* arg, /*!< in: callback arg */ bool* has_ignore) /*!< out: whether we encounter and ignored processing an operator, currently we only ignore FTS_IGNORE operator */ MY_ATTRIBUTE((nonnull, warn_unused_result)); /******************************************************************** Create a lex instance.*/ fts_lexer_t* fts_lexer_create( /*=============*/ ibool boolean_mode, /*!< in: query type */ const byte* query, /*!< in: query string */ ulint query_len) /*!< in: query string len */ MY_ATTRIBUTE((nonnull, malloc, warn_unused_result)); /******************************************************************** Free an fts_lexer_t instance.*/ void fts_lexer_free( /*===========*/ fts_lexer_t* fts_lexer) /*!< in: lexer instance to free */ MY_ATTRIBUTE((nonnull)); /** Create an ast string object, with NUL-terminator, so the string has one more byte than len @param[in] str pointer to string @param[in] len length of the string @return ast string with NUL-terminator */ fts_ast_string_t* fts_ast_string_create( const byte* str, ulint len); /** Free an ast string instance @param[in,out] ast_str string to free */ void fts_ast_string_free( fts_ast_string_t* ast_str); /** Translate ast string of type FTS_AST_NUMB to unsigned long by strtoul @param[in] str string to translate @param[in] base the base @return translated number */ ulint fts_ast_string_to_ul( const fts_ast_string_t* ast_str, int base); /* String of length len. We always store the string of length len with a terminating '\0', regardless of there is any 0x00 in the string itself */ struct fts_ast_string_t { /*!< Pointer to string. */ byte* str; /*!< Length of the string. */ ulint len; }; /* Query term type */ struct fts_ast_term_t { fts_ast_string_t* ptr; /*!< Pointer to term string.*/ ibool wildcard; /*!< TRUE if wild card set.*/ }; /* Query text type */ struct fts_ast_text_t { fts_ast_string_t* ptr; /*!< Pointer to text string.*/ ulint distance; /*!< > 0 if proximity distance set */ }; /* The list of nodes in an expr list */ struct fts_ast_list_t { fts_ast_node_t* head; /*!< Children list head */ fts_ast_node_t* tail; /*!< Children list tail */ }; /* FTS AST node to store the term, text, operator and sub-expressions.*/ struct fts_ast_node_t { fts_ast_type_t type; /*!< The type of node */ fts_ast_text_t text; /*!< Text node */ fts_ast_term_t term; /*!< Term node */ fts_ast_oper_t oper; /*!< Operator value */ fts_ast_list_t list; /*!< Expression list */ fts_ast_node_t* next; /*!< Link for expr list */ fts_ast_node_t* next_alloc; /*!< For tracking allocations */ bool visited; /*!< whether this node is already processed */ /** current transaction */ const trx_t* trx; /* Used by plugin parser */ fts_ast_node_t* up_node; /*!< Direct up node */ bool go_up; /*!< Flag if go one level up */ }; /* To track state during parsing */ struct fts_ast_state_t { mem_heap_t* heap; /*!< Heap to use for alloc */ fts_ast_node_t* root; /*!< If all goes OK, then this will point to the root.*/ fts_ast_list_t list; /*!< List of nodes allocated */ fts_lexer_t* lexer; /*!< Lexer callback + arg */ CHARSET_INFO* charset; /*!< charset used for tokenization */ /* Used by plugin parser */ fts_ast_node_t* cur_node; /*!< Current node into which we add new node */ int depth; /*!< Depth of parsing state */ }; /******************************************************************//** Create an AST term node, makes a copy of ptr for plugin parser @return node */ extern fts_ast_node_t* fts_ast_create_node_term_for_parser( /*==========i=====================*/ void* arg, /*!< in: ast state */ const char* ptr, /*!< in: term string */ const ulint len); /*!< in: term string length */ /******************************************************************//** Create an AST phrase list node for plugin parser @return node */ extern fts_ast_node_t* fts_ast_create_node_phrase_list( /*============================*/ void* arg); /*!< in: ast state */ #ifdef UNIV_DEBUG const char* fts_ast_node_type_get(fts_ast_type_t type); #endif /* UNIV_DEBUG */ #endif /* INNOBASE_FSTS0AST_H */