diff options
Diffstat (limited to 'sql/item_cmpfunc.cc')
-rw-r--r-- | sql/item_cmpfunc.cc | 241 |
1 files changed, 139 insertions, 102 deletions
diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index 676e80ae79b..2f1592a5f3d 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -5034,156 +5034,193 @@ bool Item_func_like::find_selective_predicates_list_processor(uchar *arg) } + +/** + Convert string to lib_charset, if needed. +*/ +String *Regexp_processor_pcre::convert_if_needed(String *str, String *converter) +{ + if (m_conversion_is_needed) + { + uint dummy_errors; + if (converter->copy(str->ptr(), str->length(), str->charset(), + m_library_charset, &dummy_errors)) + return NULL; + str= converter; + } + return str; +} + + /** @brief Compile regular expression. + @param[in] pattern the pattern to compile from. @param[in] send_error send error message if any. @details Make necessary character set conversion then compile regular expression passed in the args[1]. - @retval 0 success. - @retval 1 error occurred. - @retval -1 given null regular expression. + @retval false success. + @retval true error occurred. */ -int Item_func_regex::regcomp(bool send_error) +bool Regexp_processor_pcre::compile(String *pattern, bool send_error) { - char buff[MAX_FIELD_WIDTH]; - String tmp(buff,sizeof(buff),&my_charset_bin); - String *res= args[1]->val_str(&tmp); - int error; + const char *pcreErrorStr; + int pcreErrorOffset; - if (args[1]->null_value) - return -1; - - if (regex_compiled) + if (is_compiled()) { - if (!stringcmp(res, &prev_regexp)) - return 0; - prev_regexp.copy(*res); - my_regfree(&preg); - regex_compiled= 0; + if (!stringcmp(pattern, &m_prev_pattern)) + return false; + m_prev_pattern.copy(*pattern); + pcre_free(m_pcre); + m_pcre= NULL; } - if (cmp_collation.collation != regex_lib_charset) - { - /* Convert UCS2 strings to UTF8 */ - uint dummy_errors; - if (conv.copy(res->ptr(), res->length(), res->charset(), - regex_lib_charset, &dummy_errors)) - return 1; - res= &conv; - } + if (!(pattern= convert_if_needed(pattern, &pattern_converter))) + return true; + + m_pcre= pcre_compile(pattern->c_ptr_safe(), m_library_flags, + &pcreErrorStr, &pcreErrorOffset, NULL); - if ((error= my_regcomp(&preg, res->c_ptr_safe(), - regex_lib_flags, regex_lib_charset))) + if (m_pcre == NULL) { if (send_error) { - (void) my_regerror(error, &preg, buff, sizeof(buff)); + char buff[MAX_FIELD_WIDTH]; + my_snprintf(buff, sizeof(buff), "%s at offset %d", pcreErrorStr, pcreErrorOffset); my_error(ER_REGEXP_ERROR, MYF(0), buff); } - return 1; + return true; } - regex_compiled= 1; - return 0; + return false; } -bool -Item_func_regex::fix_fields(THD *thd, Item **ref) +bool Regexp_processor_pcre::compile(Item *item, bool send_error) { - DBUG_ASSERT(fixed == 0); - if ((!args[0]->fixed && - args[0]->fix_fields(thd, args)) || args[0]->check_cols(1) || - (!args[1]->fixed && - args[1]->fix_fields(thd, args + 1)) || args[1]->check_cols(1)) - return TRUE; /* purecov: inspected */ - with_sum_func=args[0]->with_sum_func || args[1]->with_sum_func; - with_field= args[0]->with_field || args[1]->with_field; - with_subselect= args[0]->has_subquery() || args[1]->has_subquery(); - max_length= 1; - decimals= 0; + char buff[MAX_FIELD_WIDTH]; + String tmp(buff, sizeof(buff), &my_charset_bin); + String *pattern= item->val_str(&tmp); + if (item->null_value || compile(pattern, send_error)) + return true; + return false; +} - if (agg_arg_charsets_for_comparison(cmp_collation, args, 2)) - return TRUE; - regex_lib_flags= (cmp_collation.collation->state & - (MY_CS_BINSORT | MY_CS_CSSORT)) ? - REG_EXTENDED | REG_NOSUB : - REG_EXTENDED | REG_NOSUB | REG_ICASE; - /* - If the case of UCS2 and other non-ASCII character sets, - we will convert patterns and strings to UTF8. - */ - regex_lib_charset= (cmp_collation.collation->mbminlen > 1) ? - &my_charset_utf8_general_ci : - cmp_collation.collation; +bool Regexp_processor_pcre::exec(const char *str, int length, int offset) +{ + m_pcre_exec_rc= pcre_exec(m_pcre, NULL, str, length, + offset, 0, m_SubStrVec, m_subpatterns_needed * 3); + return false; +} - used_tables_cache=args[0]->used_tables() | args[1]->used_tables(); - not_null_tables_cache= (args[0]->not_null_tables() | - args[1]->not_null_tables()); - const_item_cache=args[0]->const_item() && args[1]->const_item(); - if (!regex_compiled && args[1]->const_item()) + +bool Regexp_processor_pcre::exec(String *str, int offset, + uint n_result_offsets_to_convert) +{ + if (!(str= convert_if_needed(str, &subject_converter))) + return true; + m_pcre_exec_rc= pcre_exec(m_pcre, NULL, str->c_ptr_safe(), str->length(), + offset, 0, m_SubStrVec, m_subpatterns_needed * 3); + if (m_pcre_exec_rc > 0) { - int comp_res= regcomp(TRUE); - if (comp_res == -1) - { // Will always return NULL - maybe_null=1; - fixed= 1; - return FALSE; + uint i; + for (i= 0; i < n_result_offsets_to_convert; i++) + { + /* + Convert byte offset into character offset. + */ + m_SubStrVec[i]= (int) str->charset()->cset->numchars(str->charset(), + str->ptr(), + str->ptr() + + m_SubStrVec[i]); } - else if (comp_res) - return TRUE; - regex_is_const= 1; - maybe_null= args[0]->maybe_null; } - else - maybe_null=1; - fixed= 1; - return FALSE; + return false; } -longlong Item_func_regex::val_int() +bool Regexp_processor_pcre::exec(Item *item, int offset, + uint n_result_offsets_to_convert) { - DBUG_ASSERT(fixed == 1); char buff[MAX_FIELD_WIDTH]; String tmp(buff,sizeof(buff),&my_charset_bin); - String *res= args[0]->val_str(&tmp); + String *res= item->val_str(&tmp); + if (item->null_value) + return true; + return exec(res, offset, n_result_offsets_to_convert); +} - if ((null_value= (args[0]->null_value || - (!regex_is_const && regcomp(FALSE))))) - return 0; - if (cmp_collation.collation != regex_lib_charset) +void Regexp_processor_pcre::fix_owner(Item_func *owner, + Item *subject_arg, + Item *pattern_arg) +{ + if (!is_compiled() && pattern_arg->const_item()) { - /* Convert UCS2 strings to UTF8 */ - uint dummy_errors; - if (conv.copy(res->ptr(), res->length(), res->charset(), - regex_lib_charset, &dummy_errors)) + if (compile(pattern_arg, true)) { - null_value= 1; - return 0; + owner->maybe_null= 1; // Will always return NULL + return; } - res= &conv; + set_const(true); + owner->maybe_null= subject_arg->maybe_null; } - return my_regexec(&preg,res->c_ptr_safe(),0,(my_regmatch_t*) 0,0) ? 0 : 1; + else + owner->maybe_null= 1; } -void Item_func_regex::cleanup() +void +Item_func_regex::fix_length_and_dec() { - DBUG_ENTER("Item_func_regex::cleanup"); - Item_bool_func::cleanup(); - if (regex_compiled) - { - my_regfree(&preg); - regex_compiled=0; - prev_regexp.length(0); - } - DBUG_VOID_RETURN; + Item_bool_func::fix_length_and_dec(); + + if (agg_arg_charsets_for_comparison(cmp_collation, args, 2)) + return; + + re.init(cmp_collation.collation, 0, 0); + re.fix_owner(this, args[0], args[1]); +} + + +longlong Item_func_regex::val_int() +{ + DBUG_ASSERT(fixed == 1); + if ((null_value= re.recompile(args[1]))) + return 0; + + if ((null_value= re.exec(args[0], 0, 0))) + return 0; + + return re.match(); +} + + +void +Item_func_regexp_instr::fix_length_and_dec() +{ + if (agg_arg_charsets_for_comparison(cmp_collation, args, 2)) + return; + + re.init(cmp_collation.collation, 0, 1); + re.fix_owner(this, args[0], args[1]); +} + + +longlong Item_func_regexp_instr::val_int() +{ + DBUG_ASSERT(fixed == 1); + if ((null_value= re.recompile(args[1]))) + return 0; + + if ((null_value= re.exec(args[0], 0, 1))) + return 0; + + return re.match() ? re.subpattern_start(0) + 1 : 0; } |