summaryrefslogtreecommitdiff
path: root/sql/item_cmpfunc.cc
diff options
context:
space:
mode:
Diffstat (limited to 'sql/item_cmpfunc.cc')
-rw-r--r--sql/item_cmpfunc.cc241
1 files changed, 139 insertions, 102 deletions
diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc
index 676e80ae79b..2f1592a5f3d 100644
--- a/sql/item_cmpfunc.cc
+++ b/sql/item_cmpfunc.cc
@@ -5034,156 +5034,193 @@ bool Item_func_like::find_selective_predicates_list_processor(uchar *arg)
}
+
+/**
+ Convert string to lib_charset, if needed.
+*/
+String *Regexp_processor_pcre::convert_if_needed(String *str, String *converter)
+{
+ if (m_conversion_is_needed)
+ {
+ uint dummy_errors;
+ if (converter->copy(str->ptr(), str->length(), str->charset(),
+ m_library_charset, &dummy_errors))
+ return NULL;
+ str= converter;
+ }
+ return str;
+}
+
+
/**
@brief Compile regular expression.
+ @param[in] pattern the pattern to compile from.
@param[in] send_error send error message if any.
@details Make necessary character set conversion then
compile regular expression passed in the args[1].
- @retval 0 success.
- @retval 1 error occurred.
- @retval -1 given null regular expression.
+ @retval false success.
+ @retval true error occurred.
*/
-int Item_func_regex::regcomp(bool send_error)
+bool Regexp_processor_pcre::compile(String *pattern, bool send_error)
{
- char buff[MAX_FIELD_WIDTH];
- String tmp(buff,sizeof(buff),&my_charset_bin);
- String *res= args[1]->val_str(&tmp);
- int error;
+ const char *pcreErrorStr;
+ int pcreErrorOffset;
- if (args[1]->null_value)
- return -1;
-
- if (regex_compiled)
+ if (is_compiled())
{
- if (!stringcmp(res, &prev_regexp))
- return 0;
- prev_regexp.copy(*res);
- my_regfree(&preg);
- regex_compiled= 0;
+ if (!stringcmp(pattern, &m_prev_pattern))
+ return false;
+ m_prev_pattern.copy(*pattern);
+ pcre_free(m_pcre);
+ m_pcre= NULL;
}
- if (cmp_collation.collation != regex_lib_charset)
- {
- /* Convert UCS2 strings to UTF8 */
- uint dummy_errors;
- if (conv.copy(res->ptr(), res->length(), res->charset(),
- regex_lib_charset, &dummy_errors))
- return 1;
- res= &conv;
- }
+ if (!(pattern= convert_if_needed(pattern, &pattern_converter)))
+ return true;
+
+ m_pcre= pcre_compile(pattern->c_ptr_safe(), m_library_flags,
+ &pcreErrorStr, &pcreErrorOffset, NULL);
- if ((error= my_regcomp(&preg, res->c_ptr_safe(),
- regex_lib_flags, regex_lib_charset)))
+ if (m_pcre == NULL)
{
if (send_error)
{
- (void) my_regerror(error, &preg, buff, sizeof(buff));
+ char buff[MAX_FIELD_WIDTH];
+ my_snprintf(buff, sizeof(buff), "%s at offset %d", pcreErrorStr, pcreErrorOffset);
my_error(ER_REGEXP_ERROR, MYF(0), buff);
}
- return 1;
+ return true;
}
- regex_compiled= 1;
- return 0;
+ return false;
}
-bool
-Item_func_regex::fix_fields(THD *thd, Item **ref)
+bool Regexp_processor_pcre::compile(Item *item, bool send_error)
{
- DBUG_ASSERT(fixed == 0);
- if ((!args[0]->fixed &&
- args[0]->fix_fields(thd, args)) || args[0]->check_cols(1) ||
- (!args[1]->fixed &&
- args[1]->fix_fields(thd, args + 1)) || args[1]->check_cols(1))
- return TRUE; /* purecov: inspected */
- with_sum_func=args[0]->with_sum_func || args[1]->with_sum_func;
- with_field= args[0]->with_field || args[1]->with_field;
- with_subselect= args[0]->has_subquery() || args[1]->has_subquery();
- max_length= 1;
- decimals= 0;
+ char buff[MAX_FIELD_WIDTH];
+ String tmp(buff, sizeof(buff), &my_charset_bin);
+ String *pattern= item->val_str(&tmp);
+ if (item->null_value || compile(pattern, send_error))
+ return true;
+ return false;
+}
- if (agg_arg_charsets_for_comparison(cmp_collation, args, 2))
- return TRUE;
- regex_lib_flags= (cmp_collation.collation->state &
- (MY_CS_BINSORT | MY_CS_CSSORT)) ?
- REG_EXTENDED | REG_NOSUB :
- REG_EXTENDED | REG_NOSUB | REG_ICASE;
- /*
- If the case of UCS2 and other non-ASCII character sets,
- we will convert patterns and strings to UTF8.
- */
- regex_lib_charset= (cmp_collation.collation->mbminlen > 1) ?
- &my_charset_utf8_general_ci :
- cmp_collation.collation;
+bool Regexp_processor_pcre::exec(const char *str, int length, int offset)
+{
+ m_pcre_exec_rc= pcre_exec(m_pcre, NULL, str, length,
+ offset, 0, m_SubStrVec, m_subpatterns_needed * 3);
+ return false;
+}
- used_tables_cache=args[0]->used_tables() | args[1]->used_tables();
- not_null_tables_cache= (args[0]->not_null_tables() |
- args[1]->not_null_tables());
- const_item_cache=args[0]->const_item() && args[1]->const_item();
- if (!regex_compiled && args[1]->const_item())
+
+bool Regexp_processor_pcre::exec(String *str, int offset,
+ uint n_result_offsets_to_convert)
+{
+ if (!(str= convert_if_needed(str, &subject_converter)))
+ return true;
+ m_pcre_exec_rc= pcre_exec(m_pcre, NULL, str->c_ptr_safe(), str->length(),
+ offset, 0, m_SubStrVec, m_subpatterns_needed * 3);
+ if (m_pcre_exec_rc > 0)
{
- int comp_res= regcomp(TRUE);
- if (comp_res == -1)
- { // Will always return NULL
- maybe_null=1;
- fixed= 1;
- return FALSE;
+ uint i;
+ for (i= 0; i < n_result_offsets_to_convert; i++)
+ {
+ /*
+ Convert byte offset into character offset.
+ */
+ m_SubStrVec[i]= (int) str->charset()->cset->numchars(str->charset(),
+ str->ptr(),
+ str->ptr() +
+ m_SubStrVec[i]);
}
- else if (comp_res)
- return TRUE;
- regex_is_const= 1;
- maybe_null= args[0]->maybe_null;
}
- else
- maybe_null=1;
- fixed= 1;
- return FALSE;
+ return false;
}
-longlong Item_func_regex::val_int()
+bool Regexp_processor_pcre::exec(Item *item, int offset,
+ uint n_result_offsets_to_convert)
{
- DBUG_ASSERT(fixed == 1);
char buff[MAX_FIELD_WIDTH];
String tmp(buff,sizeof(buff),&my_charset_bin);
- String *res= args[0]->val_str(&tmp);
+ String *res= item->val_str(&tmp);
+ if (item->null_value)
+ return true;
+ return exec(res, offset, n_result_offsets_to_convert);
+}
- if ((null_value= (args[0]->null_value ||
- (!regex_is_const && regcomp(FALSE)))))
- return 0;
- if (cmp_collation.collation != regex_lib_charset)
+void Regexp_processor_pcre::fix_owner(Item_func *owner,
+ Item *subject_arg,
+ Item *pattern_arg)
+{
+ if (!is_compiled() && pattern_arg->const_item())
{
- /* Convert UCS2 strings to UTF8 */
- uint dummy_errors;
- if (conv.copy(res->ptr(), res->length(), res->charset(),
- regex_lib_charset, &dummy_errors))
+ if (compile(pattern_arg, true))
{
- null_value= 1;
- return 0;
+ owner->maybe_null= 1; // Will always return NULL
+ return;
}
- res= &conv;
+ set_const(true);
+ owner->maybe_null= subject_arg->maybe_null;
}
- return my_regexec(&preg,res->c_ptr_safe(),0,(my_regmatch_t*) 0,0) ? 0 : 1;
+ else
+ owner->maybe_null= 1;
}
-void Item_func_regex::cleanup()
+void
+Item_func_regex::fix_length_and_dec()
{
- DBUG_ENTER("Item_func_regex::cleanup");
- Item_bool_func::cleanup();
- if (regex_compiled)
- {
- my_regfree(&preg);
- regex_compiled=0;
- prev_regexp.length(0);
- }
- DBUG_VOID_RETURN;
+ Item_bool_func::fix_length_and_dec();
+
+ if (agg_arg_charsets_for_comparison(cmp_collation, args, 2))
+ return;
+
+ re.init(cmp_collation.collation, 0, 0);
+ re.fix_owner(this, args[0], args[1]);
+}
+
+
+longlong Item_func_regex::val_int()
+{
+ DBUG_ASSERT(fixed == 1);
+ if ((null_value= re.recompile(args[1])))
+ return 0;
+
+ if ((null_value= re.exec(args[0], 0, 0)))
+ return 0;
+
+ return re.match();
+}
+
+
+void
+Item_func_regexp_instr::fix_length_and_dec()
+{
+ if (agg_arg_charsets_for_comparison(cmp_collation, args, 2))
+ return;
+
+ re.init(cmp_collation.collation, 0, 1);
+ re.fix_owner(this, args[0], args[1]);
+}
+
+
+longlong Item_func_regexp_instr::val_int()
+{
+ DBUG_ASSERT(fixed == 1);
+ if ((null_value= re.recompile(args[1])))
+ return 0;
+
+ if ((null_value= re.exec(args[0], 0, 1)))
+ return 0;
+
+ return re.match() ? re.subpattern_start(0) + 1 : 0;
}