summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mnogosearch.org>2013-09-26 18:02:17 +0400
committerAlexander Barkov <bar@mnogosearch.org>2013-09-26 18:02:17 +0400
commit285e7aa179a6081531be3274772b89e8989fd107 (patch)
tree71836ea6e49f48fff6e957bcef38628e7b979001 /sql
parent9d83468e78ba23f024ce3c11443913ad75cf1ea5 (diff)
downloadmariadb-git-285e7aa179a6081531be3274772b89e8989fd107.tar.gz
MDEV-4425 REGEXP enhancements
Diffstat (limited to 'sql')
-rw-r--r--sql/CMakeLists.txt4
-rw-r--r--sql/item_cmpfunc.cc241
-rw-r--r--sql/item_cmpfunc.h124
-rw-r--r--sql/item_create.cc69
-rw-r--r--sql/item_strfunc.cc181
-rw-r--r--sql/item_strfunc.h43
-rw-r--r--sql/mysqld.cc6
-rw-r--r--sql/sql_string.h5
8 files changed, 553 insertions, 120 deletions
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt
index 2dfdfa2154e..408b1ce8c68 100644
--- a/sql/CMakeLists.txt
+++ b/sql/CMakeLists.txt
@@ -16,7 +16,7 @@
INCLUDE_DIRECTORIES(
${CMAKE_SOURCE_DIR}/include
${CMAKE_SOURCE_DIR}/sql
-${CMAKE_SOURCE_DIR}/regex
+${CMAKE_SOURCE_DIR}/pcre
${ZLIB_INCLUDE_DIR}
${SSL_INCLUDE_DIRS}
${CMAKE_BINARY_DIR}/sql
@@ -106,7 +106,7 @@ ADD_LIBRARY(sql STATIC ${SQL_SOURCE})
ADD_DEPENDENCIES(sql GenServerSource)
DTRACE_INSTRUMENT(sql)
TARGET_LINK_LIBRARIES(sql ${MYSQLD_STATIC_PLUGIN_LIBS}
- mysys dbug strings vio regex ${LIBJEMALLOC}
+ mysys dbug strings vio pcre ${LIBJEMALLOC}
${LIBWRAP} ${LIBCRYPT} ${LIBDL} ${CMAKE_THREAD_LIBS_INIT}
${SSL_LIBRARIES})
diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc
index 676e80ae79b..2f1592a5f3d 100644
--- a/sql/item_cmpfunc.cc
+++ b/sql/item_cmpfunc.cc
@@ -5034,156 +5034,193 @@ bool Item_func_like::find_selective_predicates_list_processor(uchar *arg)
}
+
+/**
+ Convert string to lib_charset, if needed.
+*/
+String *Regexp_processor_pcre::convert_if_needed(String *str, String *converter)
+{
+ if (m_conversion_is_needed)
+ {
+ uint dummy_errors;
+ if (converter->copy(str->ptr(), str->length(), str->charset(),
+ m_library_charset, &dummy_errors))
+ return NULL;
+ str= converter;
+ }
+ return str;
+}
+
+
/**
@brief Compile regular expression.
+ @param[in] pattern the pattern to compile from.
@param[in] send_error send error message if any.
@details Make necessary character set conversion then
compile regular expression passed in the args[1].
- @retval 0 success.
- @retval 1 error occurred.
- @retval -1 given null regular expression.
+ @retval false success.
+ @retval true error occurred.
*/
-int Item_func_regex::regcomp(bool send_error)
+bool Regexp_processor_pcre::compile(String *pattern, bool send_error)
{
- char buff[MAX_FIELD_WIDTH];
- String tmp(buff,sizeof(buff),&my_charset_bin);
- String *res= args[1]->val_str(&tmp);
- int error;
+ const char *pcreErrorStr;
+ int pcreErrorOffset;
- if (args[1]->null_value)
- return -1;
-
- if (regex_compiled)
+ if (is_compiled())
{
- if (!stringcmp(res, &prev_regexp))
- return 0;
- prev_regexp.copy(*res);
- my_regfree(&preg);
- regex_compiled= 0;
+ if (!stringcmp(pattern, &m_prev_pattern))
+ return false;
+ m_prev_pattern.copy(*pattern);
+ pcre_free(m_pcre);
+ m_pcre= NULL;
}
- if (cmp_collation.collation != regex_lib_charset)
- {
- /* Convert UCS2 strings to UTF8 */
- uint dummy_errors;
- if (conv.copy(res->ptr(), res->length(), res->charset(),
- regex_lib_charset, &dummy_errors))
- return 1;
- res= &conv;
- }
+ if (!(pattern= convert_if_needed(pattern, &pattern_converter)))
+ return true;
+
+ m_pcre= pcre_compile(pattern->c_ptr_safe(), m_library_flags,
+ &pcreErrorStr, &pcreErrorOffset, NULL);
- if ((error= my_regcomp(&preg, res->c_ptr_safe(),
- regex_lib_flags, regex_lib_charset)))
+ if (m_pcre == NULL)
{
if (send_error)
{
- (void) my_regerror(error, &preg, buff, sizeof(buff));
+ char buff[MAX_FIELD_WIDTH];
+ my_snprintf(buff, sizeof(buff), "%s at offset %d", pcreErrorStr, pcreErrorOffset);
my_error(ER_REGEXP_ERROR, MYF(0), buff);
}
- return 1;
+ return true;
}
- regex_compiled= 1;
- return 0;
+ return false;
}
-bool
-Item_func_regex::fix_fields(THD *thd, Item **ref)
+bool Regexp_processor_pcre::compile(Item *item, bool send_error)
{
- DBUG_ASSERT(fixed == 0);
- if ((!args[0]->fixed &&
- args[0]->fix_fields(thd, args)) || args[0]->check_cols(1) ||
- (!args[1]->fixed &&
- args[1]->fix_fields(thd, args + 1)) || args[1]->check_cols(1))
- return TRUE; /* purecov: inspected */
- with_sum_func=args[0]->with_sum_func || args[1]->with_sum_func;
- with_field= args[0]->with_field || args[1]->with_field;
- with_subselect= args[0]->has_subquery() || args[1]->has_subquery();
- max_length= 1;
- decimals= 0;
+ char buff[MAX_FIELD_WIDTH];
+ String tmp(buff, sizeof(buff), &my_charset_bin);
+ String *pattern= item->val_str(&tmp);
+ if (item->null_value || compile(pattern, send_error))
+ return true;
+ return false;
+}
- if (agg_arg_charsets_for_comparison(cmp_collation, args, 2))
- return TRUE;
- regex_lib_flags= (cmp_collation.collation->state &
- (MY_CS_BINSORT | MY_CS_CSSORT)) ?
- REG_EXTENDED | REG_NOSUB :
- REG_EXTENDED | REG_NOSUB | REG_ICASE;
- /*
- If the case of UCS2 and other non-ASCII character sets,
- we will convert patterns and strings to UTF8.
- */
- regex_lib_charset= (cmp_collation.collation->mbminlen > 1) ?
- &my_charset_utf8_general_ci :
- cmp_collation.collation;
+bool Regexp_processor_pcre::exec(const char *str, int length, int offset)
+{
+ m_pcre_exec_rc= pcre_exec(m_pcre, NULL, str, length,
+ offset, 0, m_SubStrVec, m_subpatterns_needed * 3);
+ return false;
+}
- used_tables_cache=args[0]->used_tables() | args[1]->used_tables();
- not_null_tables_cache= (args[0]->not_null_tables() |
- args[1]->not_null_tables());
- const_item_cache=args[0]->const_item() && args[1]->const_item();
- if (!regex_compiled && args[1]->const_item())
+
+bool Regexp_processor_pcre::exec(String *str, int offset,
+ uint n_result_offsets_to_convert)
+{
+ if (!(str= convert_if_needed(str, &subject_converter)))
+ return true;
+ m_pcre_exec_rc= pcre_exec(m_pcre, NULL, str->c_ptr_safe(), str->length(),
+ offset, 0, m_SubStrVec, m_subpatterns_needed * 3);
+ if (m_pcre_exec_rc > 0)
{
- int comp_res= regcomp(TRUE);
- if (comp_res == -1)
- { // Will always return NULL
- maybe_null=1;
- fixed= 1;
- return FALSE;
+ uint i;
+ for (i= 0; i < n_result_offsets_to_convert; i++)
+ {
+ /*
+ Convert byte offset into character offset.
+ */
+ m_SubStrVec[i]= (int) str->charset()->cset->numchars(str->charset(),
+ str->ptr(),
+ str->ptr() +
+ m_SubStrVec[i]);
}
- else if (comp_res)
- return TRUE;
- regex_is_const= 1;
- maybe_null= args[0]->maybe_null;
}
- else
- maybe_null=1;
- fixed= 1;
- return FALSE;
+ return false;
}
-longlong Item_func_regex::val_int()
+bool Regexp_processor_pcre::exec(Item *item, int offset,
+ uint n_result_offsets_to_convert)
{
- DBUG_ASSERT(fixed == 1);
char buff[MAX_FIELD_WIDTH];
String tmp(buff,sizeof(buff),&my_charset_bin);
- String *res= args[0]->val_str(&tmp);
+ String *res= item->val_str(&tmp);
+ if (item->null_value)
+ return true;
+ return exec(res, offset, n_result_offsets_to_convert);
+}
- if ((null_value= (args[0]->null_value ||
- (!regex_is_const && regcomp(FALSE)))))
- return 0;
- if (cmp_collation.collation != regex_lib_charset)
+void Regexp_processor_pcre::fix_owner(Item_func *owner,
+ Item *subject_arg,
+ Item *pattern_arg)
+{
+ if (!is_compiled() && pattern_arg->const_item())
{
- /* Convert UCS2 strings to UTF8 */
- uint dummy_errors;
- if (conv.copy(res->ptr(), res->length(), res->charset(),
- regex_lib_charset, &dummy_errors))
+ if (compile(pattern_arg, true))
{
- null_value= 1;
- return 0;
+ owner->maybe_null= 1; // Will always return NULL
+ return;
}
- res= &conv;
+ set_const(true);
+ owner->maybe_null= subject_arg->maybe_null;
}
- return my_regexec(&preg,res->c_ptr_safe(),0,(my_regmatch_t*) 0,0) ? 0 : 1;
+ else
+ owner->maybe_null= 1;
}
-void Item_func_regex::cleanup()
+void
+Item_func_regex::fix_length_and_dec()
{
- DBUG_ENTER("Item_func_regex::cleanup");
- Item_bool_func::cleanup();
- if (regex_compiled)
- {
- my_regfree(&preg);
- regex_compiled=0;
- prev_regexp.length(0);
- }
- DBUG_VOID_RETURN;
+ Item_bool_func::fix_length_and_dec();
+
+ if (agg_arg_charsets_for_comparison(cmp_collation, args, 2))
+ return;
+
+ re.init(cmp_collation.collation, 0, 0);
+ re.fix_owner(this, args[0], args[1]);
+}
+
+
+longlong Item_func_regex::val_int()
+{
+ DBUG_ASSERT(fixed == 1);
+ if ((null_value= re.recompile(args[1])))
+ return 0;
+
+ if ((null_value= re.exec(args[0], 0, 0)))
+ return 0;
+
+ return re.match();
+}
+
+
+void
+Item_func_regexp_instr::fix_length_and_dec()
+{
+ if (agg_arg_charsets_for_comparison(cmp_collation, args, 2))
+ return;
+
+ re.init(cmp_collation.collation, 0, 1);
+ re.fix_owner(this, args[0], args[1]);
+}
+
+
+longlong Item_func_regexp_instr::val_int()
+{
+ DBUG_ASSERT(fixed == 1);
+ if ((null_value= re.recompile(args[1])))
+ return 0;
+
+ if ((null_value= re.exec(args[0], 0, 1)))
+ return 0;
+
+ return re.match() ? re.subpattern_start(0) + 1 : 0;
}
diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h
index 19c43f9e55b..fbe752551df 100644
--- a/sql/item_cmpfunc.h
+++ b/sql/item_cmpfunc.h
@@ -25,7 +25,7 @@
#include "thr_malloc.h" /* sql_calloc */
#include "item_func.h" /* Item_int_func, Item_bool_func */
-#include "my_regex.h"
+#include <pcre.h> /* pcre header file */
extern Item_result item_cmp_type(Item_result a,Item_result b);
class Item_bool_func2;
@@ -1484,23 +1484,101 @@ public:
};
+class Regexp_processor_pcre
+{
+ pcre *m_pcre;
+ bool m_conversion_is_needed;
+ bool m_is_const;
+ int m_library_flags;
+ CHARSET_INFO *m_data_charset;
+ CHARSET_INFO *m_library_charset;
+ String m_prev_pattern;
+ int m_pcre_exec_rc;
+ int m_SubStrVec[30];
+ uint m_subpatterns_needed;
+public:
+ String *convert_if_needed(String *src, String *converter);
+ String subject_converter;
+ String pattern_converter;
+ String replace_converter;
+ Regexp_processor_pcre() :
+ m_pcre(NULL), m_conversion_is_needed(true), m_is_const(0),
+ m_library_flags(0),
+ m_data_charset(&my_charset_utf8_general_ci),
+ m_library_charset(&my_charset_utf8_general_ci),
+ m_subpatterns_needed(0)
+ {}
+ void init(CHARSET_INFO *data_charset, int extra_flags, uint nsubpatterns)
+ {
+ m_library_flags= PCRE_UCP | extra_flags |
+ (data_charset != &my_charset_bin ? PCRE_UTF8 : 0) |
+ ((data_charset->state &
+ (MY_CS_BINSORT | MY_CS_CSSORT)) ? 0 : PCRE_CASELESS);
+
+ // Convert text data to utf-8.
+ m_library_charset= data_charset == &my_charset_bin ?
+ &my_charset_bin : &my_charset_utf8_general_ci;
+
+ m_conversion_is_needed= (data_charset != &my_charset_bin) &&
+ !my_charset_same(data_charset, m_library_charset);
+ m_subpatterns_needed= nsubpatterns;
+ }
+ void fix_owner(Item_func *owner, Item *subject_arg, Item *pattern_arg);
+ bool compile(String *pattern, bool send_error);
+ bool compile(Item *item, bool send_error);
+ bool recompile(Item *item)
+ {
+ return !m_is_const && compile(item, false);
+ }
+ bool exec(const char *str, int length, int offset);
+ bool exec(String *str, int offset, uint n_result_offsets_to_convert);
+ bool exec(Item *item, int offset, uint n_result_offsets_to_convert);
+ bool match() const { return m_pcre_exec_rc < 0 ? 0 : 1; }
+ int nsubpatterns() const { return m_pcre_exec_rc <= 0 ? 0 : m_pcre_exec_rc; }
+ int subpattern_start(int n) const
+ {
+ return m_pcre_exec_rc <= 0 ? 0 : m_SubStrVec[n * 2];
+ }
+ int subpattern_end(int n) const
+ {
+ return m_pcre_exec_rc <= 0 ? 0 : m_SubStrVec[n * 2 + 1];
+ }
+ int subpattern_length(int n) const
+ {
+ return subpattern_end(n) - subpattern_start(n);
+ }
+ void cleanup()
+ {
+ if (m_pcre)
+ {
+ pcre_free(m_pcre);
+ m_pcre= NULL;
+ }
+ m_prev_pattern.length(0);
+ }
+ bool is_compiled() const { return m_pcre != NULL; }
+ bool is_const() const { return m_is_const; }
+ void set_const(bool arg) { m_is_const= arg; }
+ CHARSET_INFO * library_charset() const { return m_library_charset; }
+};
+
+
class Item_func_regex :public Item_bool_func
{
- my_regex_t preg;
- bool regex_compiled;
- bool regex_is_const;
- String prev_regexp;
+ Regexp_processor_pcre re;
DTCollation cmp_collation;
- CHARSET_INFO *regex_lib_charset;
- int regex_lib_flags;
- String conv;
- int regcomp(bool send_error);
public:
- Item_func_regex(Item *a,Item *b) :Item_bool_func(a,b),
- regex_compiled(0),regex_is_const(0) {}
- void cleanup();
+ Item_func_regex(Item *a,Item *b) :Item_bool_func(a,b)
+ {}
+ void cleanup()
+ {
+ DBUG_ENTER("Item_func_regex::cleanup");
+ Item_bool_func::cleanup();
+ re.cleanup();
+ DBUG_VOID_RETURN;
+ }
longlong val_int();
- bool fix_fields(THD *thd, Item **ref);
+ void fix_length_and_dec();
const char *func_name() const { return "regexp"; }
virtual inline void print(String *str, enum_query_type query_type)
@@ -1512,6 +1590,26 @@ public:
};
+class Item_func_regexp_instr :public Item_int_func
+{
+ Regexp_processor_pcre re;
+ DTCollation cmp_collation;
+public:
+ Item_func_regexp_instr(Item *a, Item *b) :Item_int_func(a, b)
+ {}
+ void cleanup()
+ {
+ DBUG_ENTER("Item_func_regexp_instr::cleanup");
+ Item_int_func::cleanup();
+ re.cleanup();
+ DBUG_VOID_RETURN;
+ }
+ longlong val_int();
+ void fix_length_and_dec();
+ const char *func_name() const { return "regexp_instr"; }
+};
+
+
typedef class Item COND;
class Item_cond :public Item_bool_func
diff --git a/sql/item_create.cc b/sql/item_create.cc
index ce4dc7ced8f..60eabe67c83 100644
--- a/sql/item_create.cc
+++ b/sql/item_create.cc
@@ -2014,6 +2014,45 @@ protected:
};
+class Create_func_regexp_instr : public Create_func_arg2
+{
+public:
+ virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
+
+ static Create_func_regexp_instr s_singleton;
+
+protected:
+ Create_func_regexp_instr() {}
+ virtual ~Create_func_regexp_instr() {}
+};
+
+
+class Create_func_regexp_replace : public Create_func_arg3
+{
+public:
+ virtual Item *create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3);
+
+ static Create_func_regexp_replace s_singleton;
+
+protected:
+ Create_func_regexp_replace() {}
+ virtual ~Create_func_regexp_replace() {}
+};
+
+
+class Create_func_regexp_substr : public Create_func_arg2
+{
+public:
+ virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
+
+ static Create_func_regexp_substr s_singleton;
+
+protected:
+ Create_func_regexp_substr() {}
+ virtual ~Create_func_regexp_substr() {}
+};
+
+
class Create_func_radians : public Create_func_arg1
{
public:
@@ -4719,6 +4758,33 @@ Create_func_quote::create_1_arg(THD *thd, Item *arg1)
}
+Create_func_regexp_instr Create_func_regexp_instr::s_singleton;
+
+Item*
+Create_func_regexp_instr::create_2_arg(THD *thd, Item *arg1, Item *arg2)
+{
+ return new (thd->mem_root) Item_func_regexp_instr(arg1, arg2);
+}
+
+
+Create_func_regexp_replace Create_func_regexp_replace::s_singleton;
+
+Item*
+Create_func_regexp_replace::create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3)
+{
+ return new (thd->mem_root) Item_func_regexp_replace(arg1, arg2, arg3);
+}
+
+
+Create_func_regexp_substr Create_func_regexp_substr::s_singleton;
+
+Item*
+Create_func_regexp_substr::create_2_arg(THD *thd, Item *arg1, Item *arg2)
+{
+ return new (thd->mem_root) Item_func_regexp_substr(arg1, arg2);
+}
+
+
Create_func_radians Create_func_radians::s_singleton;
Item*
@@ -5514,6 +5580,9 @@ static Native_func_registry func_array[] =
{ { C_STRING_WITH_LEN("POW") }, BUILDER(Create_func_pow)},
{ { C_STRING_WITH_LEN("POWER") }, BUILDER(Create_func_pow)},
{ { C_STRING_WITH_LEN("QUOTE") }, BUILDER(Create_func_quote)},
+ { { C_STRING_WITH_LEN("REGEXP_INSTR") }, BUILDER(Create_func_regexp_instr)},
+ { { C_STRING_WITH_LEN("REGEXP_REPLACE") }, BUILDER(Create_func_regexp_replace)},
+ { { C_STRING_WITH_LEN("REGEXP_SUBSTR") }, BUILDER(Create_func_regexp_substr)},
{ { C_STRING_WITH_LEN("RADIANS") }, BUILDER(Create_func_radians)},
{ { C_STRING_WITH_LEN("RAND") }, BUILDER(Create_func_rand)},
{ { C_STRING_WITH_LEN("RELEASE_LOCK") }, BUILDER(Create_func_release_lock)},
diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc
index fe67c1e81d1..02d64952459 100644
--- a/sql/item_strfunc.cc
+++ b/sql/item_strfunc.cc
@@ -1260,6 +1260,187 @@ void Item_func_replace::fix_length_and_dec()
}
+/*********************************************************************/
+void Item_func_regexp_replace::fix_length_and_dec()
+{
+ if (agg_arg_charsets_for_string_result_with_comparison(collation, args, 3))
+ return;
+ max_length= MAX_BLOB_WIDTH;
+ re.init(collation.collation, 0, 10);
+ re.fix_owner(this, args[0], args[1]);
+}
+
+
+/*
+ Traverse through the replacement string and append to "str".
+ Sub-pattern references \0 .. \9 are recognized, which are replaced
+ to the chunks of the source string.
+*/
+bool Item_func_regexp_replace::append_replacement(String *str,
+ const LEX_CSTRING *source,
+ const LEX_CSTRING *replace)
+{
+ const char *beg= replace->str;
+ const char *end= beg + replace->length;
+ CHARSET_INFO *cs= re.library_charset();
+
+ for ( ; ; )
+ {
+ my_wc_t wc;
+ int cnv, n;
+
+ if ((cnv= cs->cset->mb_wc(cs, &wc, (const uchar *) beg,
+ (const uchar *) end)) < 1)
+ break; /* End of line */
+ beg+= cnv;
+
+ if (wc != '\\')
+ {
+ if (str->append(beg - cnv, cnv, cs))
+ return true;
+ continue;
+ }
+
+ if ((cnv= cs->cset->mb_wc(cs, &wc, (const uchar *) beg,
+ (const uchar *) end)) < 1)
+ break; /* End of line */
+ beg+= cnv;
+
+ if ((n= ((int) wc) - '0') >= 0 && n <= 9 && n < re.nsubpatterns())
+ {
+ /* A valid sub-pattern reference found */
+ int pbeg= re.subpattern_start(n), plength= re.subpattern_end(n) - pbeg;
+ if (str->append(source->str + pbeg, plength, cs))
+ return true;
+ }
+ else
+ {
+ /*
+ A non-digit character following after '\'.
+ Just add the character itself.
+ */
+ if (str->append(beg - cnv, cnv, cs))
+ return false;
+ }
+ }
+ return false;
+}
+
+
+String *Item_func_regexp_replace::val_str(String *str)
+{
+ DBUG_ASSERT(fixed == 1);
+ char buff0[MAX_FIELD_WIDTH];
+ char buff2[MAX_FIELD_WIDTH];
+ String tmp0(buff0,sizeof(buff0),&my_charset_bin);
+ String tmp2(buff2,sizeof(buff2),&my_charset_bin);
+ String *source= args[0]->val_str(&tmp0);
+ String *replace= args[2]->val_str(&tmp2);
+ LEX_CSTRING src, rpl;
+ int startoffset= 0;
+
+ if ((null_value= (args[0]->null_value || args[2]->null_value ||
+ re.recompile(args[1]))))
+ return (String *) 0;
+
+ if (!(source= re.convert_if_needed(source, &re.subject_converter)) ||
+ !(replace= re.convert_if_needed(replace, &re.replace_converter)))
+ goto err;
+
+ src= source->lex_cstring();
+ rpl= replace->lex_cstring();
+
+ str->length(0);
+ str->set_charset(collation.collation);
+
+ for ( ; ; ) // Iterate through all matches
+ {
+
+ if (re.exec(src.str, src.length, startoffset))
+ goto err;
+
+ if (!re.match() || re.subpattern_length(0) == 0)
+ {
+ /*
+ No match or an empty match.
+ Append the rest of the source string
+ starting from startoffset until the end of the source.
+ */
+ if (str->append(src.str + startoffset, src.length - startoffset, re.library_charset()))
+ goto err;
+ return str;
+ }
+
+ /*
+ Append prefix, the part before the matching pattern.
+ starting from startoffset until the next match
+ */
+ if (str->append(src.str + startoffset, re.subpattern_start(0) - startoffset, re.library_charset()))
+ goto err;
+
+ // Append replacement
+ if (append_replacement(str, &src, &rpl))
+ goto err;
+
+ // Set the new start point as the end of previous match
+ startoffset= re.subpattern_end(0);
+ }
+ return str;
+
+err:
+ null_value= true;
+ return (String *) 0;
+}
+
+
+void Item_func_regexp_substr::fix_length_and_dec()
+{
+ if (agg_arg_charsets_for_string_result_with_comparison(collation, args, 2))
+ return;
+ fix_char_length(args[0]->max_char_length());
+ re.init(collation.collation, 0, 10);
+ re.fix_owner(this, args[0], args[1]);
+}
+
+
+String *Item_func_regexp_substr::val_str(String *str)
+{
+ DBUG_ASSERT(fixed == 1);
+ char buff0[MAX_FIELD_WIDTH];
+ String tmp0(buff0,sizeof(buff0),&my_charset_bin);
+ String *source= args[0]->val_str(&tmp0);
+
+ if ((null_value= (args[0]->null_value || re.recompile(args[1]))))
+ return (String *) 0;
+
+ if (!(source= re.convert_if_needed(source, &re.subject_converter)))
+ goto err;
+
+ str->length(0);
+ str->set_charset(collation.collation);
+
+ if (re.exec(source->ptr(), source->length(), 0))
+ goto err;
+
+ if (!re.match())
+ return str;
+
+ if (str->append(source->ptr() + re.subpattern_start(0),
+ re.subpattern_end(0) - re.subpattern_start(0),
+ re.library_charset()))
+ goto err;
+
+ return str;
+
+err:
+ null_value= true;
+ return (String *) 0;
+}
+
+
+/************************************************************************/
+
+
String *Item_func_insert::val_str(String *str)
{
DBUG_ASSERT(fixed == 1);
diff --git a/sql/item_strfunc.h b/sql/item_strfunc.h
index 93efc91d62c..e09dee18d4f 100644
--- a/sql/item_strfunc.h
+++ b/sql/item_strfunc.h
@@ -195,6 +195,49 @@ public:
};
+class Item_func_regexp_replace :public Item_str_func
+{
+ Regexp_processor_pcre re;
+ bool append_replacement(String *str,
+ const LEX_CSTRING *source,
+ const LEX_CSTRING *replace);
+public:
+ Item_func_regexp_replace(Item *a, Item *b, Item *c)
+ :Item_str_func(a, b, c)
+ {}
+ void cleanup()
+ {
+ DBUG_ENTER("Item_func_regex::cleanup");
+ Item_str_func::cleanup();
+ re.cleanup();
+ DBUG_VOID_RETURN;
+ }
+ String *val_str(String *str);
+ void fix_length_and_dec();
+ const char *func_name() const { return "regexp_replace"; }
+};
+
+
+class Item_func_regexp_substr :public Item_str_func
+{
+ Regexp_processor_pcre re;
+public:
+ Item_func_regexp_substr(Item *a, Item *b)
+ :Item_str_func(a, b)
+ {}
+ void cleanup()
+ {
+ DBUG_ENTER("Item_func_regex::cleanup");
+ Item_str_func::cleanup();
+ re.cleanup();
+ DBUG_VOID_RETURN;
+ }
+ String *val_str(String *str);
+ void fix_length_and_dec();
+ const char *func_name() const { return "regexp_substr"; }
+};
+
+
class Item_func_insert :public Item_str_func
{
String tmp_value;
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index abdd5ca0ac4..eff0f850edf 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -1898,7 +1898,7 @@ void clean_up(bool print_message)
delete global_rpl_filter;
end_ssl();
vio_end();
- my_regex_end();
+ //my_regex_end();
#if defined(ENABLED_DEBUG_SYNC)
/* End the debug sync facility. See debug_sync.cc. */
debug_sync_end();
@@ -3904,10 +3904,10 @@ static int init_common_variables()
return 1;
item_init();
#ifndef EMBEDDED_LIBRARY
- my_regex_init(&my_charset_latin1, check_enough_stack_size);
+ //my_regex_init(&my_charset_latin1, check_enough_stack_size);
my_string_stack_guard= check_enough_stack_size;
#else
- my_regex_init(&my_charset_latin1, NULL);
+ //my_regex_init(&my_charset_latin1, NULL);
#endif
/*
Process a comma-separated character set list and choose
diff --git a/sql/sql_string.h b/sql/sql_string.h
index ab065f7bdfc..2bd591f1833 100644
--- a/sql/sql_string.h
+++ b/sql/sql_string.h
@@ -160,6 +160,11 @@ public:
LEX_STRING lex_string = { (char*) ptr(), length() };
return lex_string;
}
+ LEX_CSTRING lex_cstring() const
+ {
+ LEX_CSTRING lex_cstring = { ptr(), length() };
+ return lex_cstring;
+ }
void set(String &str,uint32 offset,uint32 arg_length)
{