MDEV-4425 REGEXP enhancements

author: Alexander Barkov <bar@mnogosearch.org> 2013-09-26 18:02:17 +0400
committer: Alexander Barkov <bar@mnogosearch.org> 2013-09-26 18:02:17 +0400
commit: 285e7aa179a6081531be3274772b89e8989fd107 (patch)
tree: 71836ea6e49f48fff6e957bcef38628e7b979001 /sql
parent: 9d83468e78ba23f024ce3c11443913ad75cf1ea5 (diff)
download: mariadb-git-285e7aa179a6081531be3274772b89e8989fd107.tar.gz
8 files changed, 553 insertions, 120 deletions
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt
index 2dfdfa2154e..408b1ce8c68 100644
--- a/sql/CMakeLists.txt
+++ b/sql/CMakeLists.txt
@@ -16,7 +16,7 @@
 INCLUDE_DIRECTORIES(
 ${CMAKE_SOURCE_DIR}/include 
 ${CMAKE_SOURCE_DIR}/sql 
-${CMAKE_SOURCE_DIR}/regex 
+${CMAKE_SOURCE_DIR}/pcre 
 ${ZLIB_INCLUDE_DIR}
 ${SSL_INCLUDE_DIRS}
 ${CMAKE_BINARY_DIR}/sql
@@ -106,7 +106,7 @@ ADD_LIBRARY(sql STATIC ${SQL_SOURCE})
 ADD_DEPENDENCIES(sql GenServerSource)
 DTRACE_INSTRUMENT(sql)
 TARGET_LINK_LIBRARIES(sql ${MYSQLD_STATIC_PLUGIN_LIBS} 
-  mysys dbug strings vio regex ${LIBJEMALLOC}
+  mysys dbug strings vio pcre ${LIBJEMALLOC}
   ${LIBWRAP} ${LIBCRYPT} ${LIBDL} ${CMAKE_THREAD_LIBS_INIT}
   ${SSL_LIBRARIES})
 
diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc
index 676e80ae79b..2f1592a5f3d 100644
--- a/sql/item_cmpfunc.cc
+++ b/sql/item_cmpfunc.cc
@@ -5034,156 +5034,193 @@ bool Item_func_like::find_selective_predicates_list_processor(uchar *arg)
 }
 
 
+
+/**
+  Convert string to lib_charset, if needed.
+*/
+String *Regexp_processor_pcre::convert_if_needed(String *str, String *converter)
+{
+  if (m_conversion_is_needed)
+  {
+    uint dummy_errors;
+    if (converter->copy(str->ptr(), str->length(), str->charset(),
+                        m_library_charset, &dummy_errors))
+      return NULL;
+    str= converter;
+  }
+  return str;
+}
+
+
 /**
   @brief Compile regular expression.
 
+  @param[in]    pattern        the pattern to compile from.
   @param[in]    send_error     send error message if any.
 
   @details Make necessary character set conversion then 
   compile regular expression passed in the args[1].
 
-  @retval    0     success.
-  @retval    1     error occurred.
-  @retval   -1     given null regular expression.
+  @retval    false  success.
+  @retval    true   error occurred.
  */
 
-int Item_func_regex::regcomp(bool send_error)
+bool Regexp_processor_pcre::compile(String *pattern, bool send_error)
 {
-  char buff[MAX_FIELD_WIDTH];
-  String tmp(buff,sizeof(buff),&my_charset_bin);
-  String *res= args[1]->val_str(&tmp);
-  int error;
+  const char *pcreErrorStr;
+  int pcreErrorOffset;
 
-  if (args[1]->null_value)
-    return -1;
-
-  if (regex_compiled)
+  if (is_compiled())
   {
-    if (!stringcmp(res, &prev_regexp))
-      return 0;
-    prev_regexp.copy(*res);
-    my_regfree(&preg);
-    regex_compiled= 0;
+    if (!stringcmp(pattern, &m_prev_pattern))
+      return false;
+    m_prev_pattern.copy(*pattern);
+    pcre_free(m_pcre);
+    m_pcre= NULL;
   }
 
-  if (cmp_collation.collation != regex_lib_charset)
-  {
-    /* Convert UCS2 strings to UTF8 */
-    uint dummy_errors;
-    if (conv.copy(res->ptr(), res->length(), res->charset(),
-                  regex_lib_charset, &dummy_errors))
-      return 1;
-    res= &conv;
-  }
+  if (!(pattern= convert_if_needed(pattern, &pattern_converter)))
+    return true;
+
+  m_pcre= pcre_compile(pattern->c_ptr_safe(), m_library_flags,
+                       &pcreErrorStr, &pcreErrorOffset, NULL);
 
-  if ((error= my_regcomp(&preg, res->c_ptr_safe(),
-                         regex_lib_flags, regex_lib_charset)))
+  if (m_pcre == NULL)
   {
     if (send_error)
     {
-      (void) my_regerror(error, &preg, buff, sizeof(buff));
+      char buff[MAX_FIELD_WIDTH];
+      my_snprintf(buff, sizeof(buff), "%s at offset %d", pcreErrorStr, pcreErrorOffset);
       my_error(ER_REGEXP_ERROR, MYF(0), buff);
     }
-    return 1;
+    return true;
   }
-  regex_compiled= 1;
-  return 0;
+  return false;
 }
 
 
-bool
-Item_func_regex::fix_fields(THD *thd, Item **ref)
+bool Regexp_processor_pcre::compile(Item *item, bool send_error)
 {
-  DBUG_ASSERT(fixed == 0);
-  if ((!args[0]->fixed &&
-       args[0]->fix_fields(thd, args)) || args[0]->check_cols(1) ||
-      (!args[1]->fixed &&
-       args[1]->fix_fields(thd, args + 1)) || args[1]->check_cols(1))
-    return TRUE;				/* purecov: inspected */
-  with_sum_func=args[0]->with_sum_func || args[1]->with_sum_func;
-  with_field= args[0]->with_field || args[1]->with_field;
-  with_subselect= args[0]->has_subquery() || args[1]->has_subquery();
-  max_length= 1;
-  decimals= 0;
+  char buff[MAX_FIELD_WIDTH];
+  String tmp(buff, sizeof(buff), &my_charset_bin);
+  String *pattern= item->val_str(&tmp);
+  if (item->null_value || compile(pattern, send_error))
+    return true;
+  return false;
+}
 
-  if (agg_arg_charsets_for_comparison(cmp_collation, args, 2))
-    return TRUE;
 
-  regex_lib_flags= (cmp_collation.collation->state &
-                    (MY_CS_BINSORT | MY_CS_CSSORT)) ?
-                   REG_EXTENDED | REG_NOSUB :
-                   REG_EXTENDED | REG_NOSUB | REG_ICASE;
-  /*
-    If the case of UCS2 and other non-ASCII character sets,
-    we will convert patterns and strings to UTF8.
-  */
-  regex_lib_charset= (cmp_collation.collation->mbminlen > 1) ?
-                     &my_charset_utf8_general_ci :
-                     cmp_collation.collation;
+bool Regexp_processor_pcre::exec(const char *str, int length, int offset)
+{
+  m_pcre_exec_rc= pcre_exec(m_pcre, NULL, str, length,
+                            offset, 0, m_SubStrVec, m_subpatterns_needed * 3);
+  return false;
+}
 
-  used_tables_cache=args[0]->used_tables() | args[1]->used_tables();
-  not_null_tables_cache= (args[0]->not_null_tables() |
-			  args[1]->not_null_tables());
-  const_item_cache=args[0]->const_item() && args[1]->const_item();
-  if (!regex_compiled && args[1]->const_item())
+
+bool Regexp_processor_pcre::exec(String *str, int offset,
+                                  uint n_result_offsets_to_convert)
+{
+  if (!(str= convert_if_needed(str, &subject_converter)))
+    return true;
+  m_pcre_exec_rc= pcre_exec(m_pcre, NULL, str->c_ptr_safe(), str->length(),
+                            offset, 0, m_SubStrVec, m_subpatterns_needed * 3);
+  if (m_pcre_exec_rc > 0)
   {
-    int comp_res= regcomp(TRUE);
-    if (comp_res == -1)
-    {						// Will always return NULL
-      maybe_null=1;
-      fixed= 1;
-      return FALSE;
+    uint i;
+    for (i= 0; i < n_result_offsets_to_convert; i++)
+    {
+      /*
+        Convert byte offset into character offset.
+      */
+      m_SubStrVec[i]= (int) str->charset()->cset->numchars(str->charset(),
+                                                           str->ptr(),
+                                                           str->ptr() +
+                                                           m_SubStrVec[i]);
     }
-    else if (comp_res)
-      return TRUE;
-    regex_is_const= 1;
-    maybe_null= args[0]->maybe_null;
   }
-  else
-    maybe_null=1;
-  fixed= 1;
-  return FALSE;
+  return false;
 }
 
 
-longlong Item_func_regex::val_int()
+bool Regexp_processor_pcre::exec(Item *item, int offset,
+                                uint n_result_offsets_to_convert)
 {
-  DBUG_ASSERT(fixed == 1);
   char buff[MAX_FIELD_WIDTH];
   String tmp(buff,sizeof(buff),&my_charset_bin);
-  String *res= args[0]->val_str(&tmp);
+  String *res= item->val_str(&tmp);
+  if (item->null_value)
+    return true;
+  return exec(res, offset, n_result_offsets_to_convert);
+}
 
-  if ((null_value= (args[0]->null_value ||
-                    (!regex_is_const && regcomp(FALSE)))))
-    return 0;
 
-  if (cmp_collation.collation != regex_lib_charset)
+void Regexp_processor_pcre::fix_owner(Item_func *owner,
+                                      Item *subject_arg,
+                                      Item *pattern_arg)
+{
+  if (!is_compiled() && pattern_arg->const_item())
   {
-    /* Convert UCS2 strings to UTF8 */
-    uint dummy_errors;
-    if (conv.copy(res->ptr(), res->length(), res->charset(),
-                  regex_lib_charset, &dummy_errors))
+    if (compile(pattern_arg, true))
     {
-      null_value= 1;
-      return 0;
+      owner->maybe_null= 1; // Will always return NULL
+      return;
     }
-    res= &conv;
+    set_const(true);
+    owner->maybe_null= subject_arg->maybe_null;
   }
-  return my_regexec(&preg,res->c_ptr_safe(),0,(my_regmatch_t*) 0,0) ? 0 : 1;
+  else
+    owner->maybe_null= 1;
 }
 
 
-void Item_func_regex::cleanup()
+void
+Item_func_regex::fix_length_and_dec()
 {
-  DBUG_ENTER("Item_func_regex::cleanup");
-  Item_bool_func::cleanup();
-  if (regex_compiled)
-  {
-    my_regfree(&preg);
-    regex_compiled=0;
-    prev_regexp.length(0);
-  }
-  DBUG_VOID_RETURN;
+  Item_bool_func::fix_length_and_dec();
+
+  if (agg_arg_charsets_for_comparison(cmp_collation, args, 2))
+    return;
+
+  re.init(cmp_collation.collation, 0, 0);
+  re.fix_owner(this, args[0], args[1]);
+}
+
+
+longlong Item_func_regex::val_int()
+{
+  DBUG_ASSERT(fixed == 1);
+  if ((null_value= re.recompile(args[1])))
+    return 0;
+
+  if ((null_value= re.exec(args[0], 0, 0)))
+    return 0;
+
+  return re.match();
+}
+
+
+void
+Item_func_regexp_instr::fix_length_and_dec()
+{
+  if (agg_arg_charsets_for_comparison(cmp_collation, args, 2))
+    return;
+
+  re.init(cmp_collation.collation, 0, 1);
+  re.fix_owner(this, args[0], args[1]);
+}
+
+
+longlong Item_func_regexp_instr::val_int()
+{
+  DBUG_ASSERT(fixed == 1);
+  if ((null_value= re.recompile(args[1])))
+    return 0;
+
+  if ((null_value= re.exec(args[0], 0, 1)))
+    return 0;
+
+  return re.match() ? re.subpattern_start(0) + 1 : 0;
 }
 
 
diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h
index 19c43f9e55b..fbe752551df 100644
--- a/sql/item_cmpfunc.h
+++ b/sql/item_cmpfunc.h
@@ -25,7 +25,7 @@
 
 #include "thr_malloc.h"                         /* sql_calloc */
 #include "item_func.h"             /* Item_int_func, Item_bool_func */
-#include "my_regex.h"
+#include <pcre.h>                 /* pcre header file */
 
 extern Item_result item_cmp_type(Item_result a,Item_result b);
 class Item_bool_func2;
@@ -1484,23 +1484,101 @@ public:
 };
 
 
+class Regexp_processor_pcre
+{
+  pcre *m_pcre;
+  bool m_conversion_is_needed;
+  bool m_is_const;
+  int m_library_flags;
+  CHARSET_INFO *m_data_charset;
+  CHARSET_INFO *m_library_charset;
+  String m_prev_pattern;
+  int m_pcre_exec_rc;
+  int m_SubStrVec[30];
+  uint m_subpatterns_needed;
+public:
+  String *convert_if_needed(String *src, String *converter);
+  String subject_converter;
+  String pattern_converter;
+  String replace_converter;
+  Regexp_processor_pcre() :
+    m_pcre(NULL), m_conversion_is_needed(true), m_is_const(0),
+    m_library_flags(0),
+    m_data_charset(&my_charset_utf8_general_ci),
+    m_library_charset(&my_charset_utf8_general_ci),
+    m_subpatterns_needed(0)
+  {}
+  void init(CHARSET_INFO *data_charset, int extra_flags, uint nsubpatterns)
+  {
+    m_library_flags= PCRE_UCP | extra_flags |
+                    (data_charset != &my_charset_bin ? PCRE_UTF8 : 0) |
+                    ((data_charset->state &
+                     (MY_CS_BINSORT | MY_CS_CSSORT)) ? 0 : PCRE_CASELESS);
+
+    // Convert text data to utf-8.
+    m_library_charset= data_charset == &my_charset_bin ?
+                       &my_charset_bin : &my_charset_utf8_general_ci;
+
+    m_conversion_is_needed= (data_charset != &my_charset_bin) &&
+                            !my_charset_same(data_charset, m_library_charset);
+    m_subpatterns_needed= nsubpatterns;
+  }
+  void fix_owner(Item_func *owner, Item *subject_arg, Item *pattern_arg);
+  bool compile(String *pattern, bool send_error);
+  bool compile(Item *item, bool send_error);
+  bool recompile(Item *item)
+  {
+    return !m_is_const && compile(item, false);
+  }
+  bool exec(const char *str, int length, int offset);
+  bool exec(String *str, int offset, uint n_result_offsets_to_convert);
+  bool exec(Item *item, int offset, uint n_result_offsets_to_convert);
+  bool match() const { return m_pcre_exec_rc < 0 ? 0 : 1; }
+  int nsubpatterns() const { return m_pcre_exec_rc <= 0 ? 0 : m_pcre_exec_rc; }
+  int subpattern_start(int n) const
+  {
+    return m_pcre_exec_rc <= 0 ? 0 : m_SubStrVec[n * 2];
+  }
+  int subpattern_end(int n) const
+  {
+    return m_pcre_exec_rc <= 0 ? 0 : m_SubStrVec[n * 2 + 1];
+  }
+  int subpattern_length(int n) const
+  {
+    return subpattern_end(n) - subpattern_start(n);
+  }
+  void cleanup()
+  {
+    if (m_pcre)
+    {
+      pcre_free(m_pcre);
+      m_pcre= NULL;
+    }
+    m_prev_pattern.length(0);
+  }
+  bool is_compiled() const { return m_pcre != NULL; }
+  bool is_const() const { return m_is_const; }
+  void set_const(bool arg) { m_is_const= arg; }
+  CHARSET_INFO * library_charset() const { return m_library_charset; }
+};
+
+
 class Item_func_regex :public Item_bool_func
 {
-  my_regex_t preg;
-  bool regex_compiled;
-  bool regex_is_const;
-  String prev_regexp;
+  Regexp_processor_pcre re;
   DTCollation cmp_collation;
-  CHARSET_INFO *regex_lib_charset;
-  int regex_lib_flags;
-  String conv;
-  int regcomp(bool send_error);
 public:
-  Item_func_regex(Item *a,Item *b) :Item_bool_func(a,b),
-    regex_compiled(0),regex_is_const(0) {}
-  void cleanup();
+  Item_func_regex(Item *a,Item *b) :Item_bool_func(a,b)
+  {}
+  void cleanup()
+  {
+    DBUG_ENTER("Item_func_regex::cleanup");
+    Item_bool_func::cleanup();
+    re.cleanup();
+    DBUG_VOID_RETURN;
+  }
   longlong val_int();
-  bool fix_fields(THD *thd, Item **ref);
+  void fix_length_and_dec();
   const char *func_name() const { return "regexp"; }
 
   virtual inline void print(String *str, enum_query_type query_type)
@@ -1512,6 +1590,26 @@ public:
 };
 
 
+class Item_func_regexp_instr :public Item_int_func
+{
+  Regexp_processor_pcre re;
+  DTCollation cmp_collation;
+public:
+  Item_func_regexp_instr(Item *a, Item *b) :Item_int_func(a, b)
+  {}
+  void cleanup()
+  {
+    DBUG_ENTER("Item_func_regexp_instr::cleanup");
+    Item_int_func::cleanup();
+    re.cleanup();
+    DBUG_VOID_RETURN;
+  }
+  longlong val_int();
+  void fix_length_and_dec();
+  const char *func_name() const { return "regexp_instr"; }
+};
+
+
 typedef class Item COND;
 
 class Item_cond :public Item_bool_func
diff --git a/sql/item_create.cc b/sql/item_create.cc
index ce4dc7ced8f..60eabe67c83 100644
--- a/sql/item_create.cc
+++ b/sql/item_create.cc
@@ -2014,6 +2014,45 @@ protected:
 };
 
 
+class Create_func_regexp_instr : public Create_func_arg2
+{
+public:
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
+
+  static Create_func_regexp_instr s_singleton;
+
+protected:
+  Create_func_regexp_instr() {}
+  virtual ~Create_func_regexp_instr() {}
+};
+
+
+class Create_func_regexp_replace : public Create_func_arg3
+{
+public:
+  virtual Item *create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3);
+
+  static Create_func_regexp_replace s_singleton;
+
+protected:
+  Create_func_regexp_replace() {}
+  virtual ~Create_func_regexp_replace() {}
+};
+
+
+class Create_func_regexp_substr : public Create_func_arg2
+{
+public:
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
+
+  static Create_func_regexp_substr s_singleton;
+
+protected:
+  Create_func_regexp_substr() {}
+  virtual ~Create_func_regexp_substr() {}
+};
+
+
 class Create_func_radians : public Create_func_arg1
 {
 public:
@@ -4719,6 +4758,33 @@ Create_func_quote::create_1_arg(THD *thd, Item *arg1)
 }
 
 
+Create_func_regexp_instr Create_func_regexp_instr::s_singleton;
+
+Item*
+Create_func_regexp_instr::create_2_arg(THD *thd, Item *arg1, Item *arg2)
+{
+  return new (thd->mem_root) Item_func_regexp_instr(arg1, arg2);
+}
+
+
+Create_func_regexp_replace Create_func_regexp_replace::s_singleton;
+
+Item*
+Create_func_regexp_replace::create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3)
+{
+  return new (thd->mem_root) Item_func_regexp_replace(arg1, arg2, arg3);
+}
+
+
+Create_func_regexp_substr Create_func_regexp_substr::s_singleton;
+
+Item*
+Create_func_regexp_substr::create_2_arg(THD *thd, Item *arg1, Item *arg2)
+{
+  return new (thd->mem_root) Item_func_regexp_substr(arg1, arg2);
+}
+
+
 Create_func_radians Create_func_radians::s_singleton;
 
 Item*
@@ -5514,6 +5580,9 @@ static Native_func_registry func_array[] =
   { { C_STRING_WITH_LEN("POW") }, BUILDER(Create_func_pow)},
   { { C_STRING_WITH_LEN("POWER") }, BUILDER(Create_func_pow)},
   { { C_STRING_WITH_LEN("QUOTE") }, BUILDER(Create_func_quote)},
+  { { C_STRING_WITH_LEN("REGEXP_INSTR") }, BUILDER(Create_func_regexp_instr)},
+  { { C_STRING_WITH_LEN("REGEXP_REPLACE") }, BUILDER(Create_func_regexp_replace)},
+  { { C_STRING_WITH_LEN("REGEXP_SUBSTR") }, BUILDER(Create_func_regexp_substr)},
   { { C_STRING_WITH_LEN("RADIANS") }, BUILDER(Create_func_radians)},
   { { C_STRING_WITH_LEN("RAND") }, BUILDER(Create_func_rand)},
   { { C_STRING_WITH_LEN("RELEASE_LOCK") }, BUILDER(Create_func_release_lock)},
diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc
index fe67c1e81d1..02d64952459 100644
--- a/sql/item_strfunc.cc
+++ b/sql/item_strfunc.cc
@@ -1260,6 +1260,187 @@ void Item_func_replace::fix_length_and_dec()
 }
 
 
+/*********************************************************************/
+void Item_func_regexp_replace::fix_length_and_dec()
+{
+  if (agg_arg_charsets_for_string_result_with_comparison(collation, args, 3))
+    return;
+  max_length= MAX_BLOB_WIDTH;
+  re.init(collation.collation, 0, 10);
+  re.fix_owner(this, args[0], args[1]);
+}
+
+
+/*
+  Traverse through the replacement string and append to "str".
+  Sub-pattern references \0 .. \9 are recognized, which are replaced
+  to the chunks of the source string.
+*/
+bool Item_func_regexp_replace::append_replacement(String *str,
+                                                  const LEX_CSTRING *source,
+                                                  const LEX_CSTRING *replace)
+{
+  const char *beg= replace->str;
+  const char *end= beg + replace->length;
+  CHARSET_INFO *cs= re.library_charset();
+
+  for ( ; ; )
+  {
+    my_wc_t wc;
+    int cnv, n;
+
+    if ((cnv= cs->cset->mb_wc(cs, &wc, (const uchar *) beg,
+                                       (const uchar *) end)) < 1)
+      break; /* End of line */
+    beg+= cnv;
+
+    if (wc != '\\')
+    {
+      if (str->append(beg - cnv, cnv, cs))
+        return true;
+      continue;
+    }
+
+    if ((cnv= cs->cset->mb_wc(cs, &wc, (const uchar *) beg,
+                                       (const uchar *) end)) < 1)
+      break; /* End of line */
+    beg+= cnv;
+
+    if ((n= ((int) wc) - '0') >= 0 && n <= 9 && n < re.nsubpatterns())
+    {
+      /* A valid sub-pattern reference found */
+      int pbeg= re.subpattern_start(n), plength= re.subpattern_end(n) - pbeg;
+      if (str->append(source->str + pbeg, plength, cs))
+        return true;
+    }
+    else
+    {
+      /*
+         A non-digit character following after '\'.
+         Just add the character itself.
+       */
+      if (str->append(beg - cnv, cnv, cs))
+        return false;
+    }
+  }
+  return false;
+}
+
+
+String *Item_func_regexp_replace::val_str(String *str)
+{
+  DBUG_ASSERT(fixed == 1);
+  char buff0[MAX_FIELD_WIDTH];
+  char buff2[MAX_FIELD_WIDTH];
+  String tmp0(buff0,sizeof(buff0),&my_charset_bin);
+  String tmp2(buff2,sizeof(buff2),&my_charset_bin);
+  String *source= args[0]->val_str(&tmp0);
+  String *replace= args[2]->val_str(&tmp2);
+  LEX_CSTRING src, rpl;
+  int startoffset= 0;
+
+  if ((null_value= (args[0]->null_value || args[2]->null_value ||
+                    re.recompile(args[1]))))
+    return (String *) 0;
+
+  if (!(source= re.convert_if_needed(source, &re.subject_converter)) ||
+      !(replace= re.convert_if_needed(replace, &re.replace_converter)))
+    goto err;
+
+  src= source->lex_cstring();
+  rpl= replace->lex_cstring();
+
+  str->length(0);
+  str->set_charset(collation.collation);
+
+  for ( ; ; ) // Iterate through all matches
+  {
+
+    if (re.exec(src.str, src.length, startoffset))
+      goto err;
+
+    if (!re.match() || re.subpattern_length(0) == 0)
+    {
+      /* 
+        No match or an empty match.
+        Append the rest of the source string
+        starting from startoffset until the end of the source.
+      */
+      if (str->append(src.str + startoffset, src.length - startoffset, re.library_charset()))
+        goto err;
+      return str;
+    }
+
+    /*
+      Append prefix, the part before the matching pattern.
+      starting from startoffset until the next match
+    */
+    if (str->append(src.str + startoffset, re.subpattern_start(0) - startoffset, re.library_charset()))
+      goto err;
+
+    // Append replacement
+    if (append_replacement(str, &src, &rpl))
+      goto err;
+
+    // Set the new start point as the end of previous match
+    startoffset= re.subpattern_end(0);
+  }
+  return str;
+
+err:
+  null_value= true;
+  return (String *) 0;
+}
+
+
+void Item_func_regexp_substr::fix_length_and_dec()
+{
+  if (agg_arg_charsets_for_string_result_with_comparison(collation, args, 2))
+    return;
+  fix_char_length(args[0]->max_char_length());
+  re.init(collation.collation, 0, 10);
+  re.fix_owner(this, args[0], args[1]);
+}
+
+
+String *Item_func_regexp_substr::val_str(String *str)
+{
+  DBUG_ASSERT(fixed == 1);
+  char buff0[MAX_FIELD_WIDTH];
+  String tmp0(buff0,sizeof(buff0),&my_charset_bin);
+  String *source= args[0]->val_str(&tmp0);
+
+  if ((null_value= (args[0]->null_value || re.recompile(args[1]))))
+    return (String *) 0;
+
+  if (!(source= re.convert_if_needed(source, &re.subject_converter)))
+    goto err;
+
+  str->length(0);
+  str->set_charset(collation.collation);
+
+  if (re.exec(source->ptr(), source->length(), 0))
+    goto err;
+
+  if (!re.match())
+    return str;
+
+  if (str->append(source->ptr() + re.subpattern_start(0),
+                  re.subpattern_end(0) - re.subpattern_start(0),
+                  re.library_charset()))
+    goto err;
+
+  return str;
+
+err:
+  null_value= true;
+  return (String *) 0;
+}
+
+
+/************************************************************************/
+
+
 String *Item_func_insert::val_str(String *str)
 {
   DBUG_ASSERT(fixed == 1);
diff --git a/sql/item_strfunc.h b/sql/item_strfunc.h
index 93efc91d62c..e09dee18d4f 100644
--- a/sql/item_strfunc.h
+++ b/sql/item_strfunc.h
@@ -195,6 +195,49 @@ public:
 };
 
 
+class Item_func_regexp_replace :public Item_str_func
+{
+  Regexp_processor_pcre re;
+  bool append_replacement(String *str,
+                          const LEX_CSTRING *source,
+                          const LEX_CSTRING *replace);
+public:
+  Item_func_regexp_replace(Item *a, Item *b, Item *c) 
+    :Item_str_func(a, b, c)
+    {}
+  void cleanup()
+  {
+    DBUG_ENTER("Item_func_regex::cleanup");
+    Item_str_func::cleanup();
+    re.cleanup();
+    DBUG_VOID_RETURN;
+  }
+  String *val_str(String *str);
+  void fix_length_and_dec();
+  const char *func_name() const { return "regexp_replace"; }
+};
+
+
+class Item_func_regexp_substr :public Item_str_func
+{
+  Regexp_processor_pcre re;
+public:
+  Item_func_regexp_substr(Item *a, Item *b) 
+    :Item_str_func(a, b)
+    {}
+  void cleanup()
+  {
+    DBUG_ENTER("Item_func_regex::cleanup");
+    Item_str_func::cleanup();
+    re.cleanup();
+    DBUG_VOID_RETURN;
+  }
+  String *val_str(String *str);
+  void fix_length_and_dec();
+  const char *func_name() const { return "regexp_substr"; }
+};
+
+
 class Item_func_insert :public Item_str_func
 {
   String tmp_value;
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index abdd5ca0ac4..eff0f850edf 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -1898,7 +1898,7 @@ void clean_up(bool print_message)
   delete global_rpl_filter;
   end_ssl();
   vio_end();
-  my_regex_end();
+  //my_regex_end();
 #if defined(ENABLED_DEBUG_SYNC)
   /* End the debug sync facility. See debug_sync.cc. */
   debug_sync_end();
@@ -3904,10 +3904,10 @@ static int init_common_variables()
     return 1;
   item_init();
 #ifndef EMBEDDED_LIBRARY
-  my_regex_init(&my_charset_latin1, check_enough_stack_size);
+  //my_regex_init(&my_charset_latin1, check_enough_stack_size);
   my_string_stack_guard= check_enough_stack_size;
 #else
-  my_regex_init(&my_charset_latin1, NULL);
+  //my_regex_init(&my_charset_latin1, NULL);
 #endif
   /*
     Process a comma-separated character set list and choose
diff --git a/sql/sql_string.h b/sql/sql_string.h
index ab065f7bdfc..2bd591f1833 100644
--- a/sql/sql_string.h
+++ b/sql/sql_string.h
@@ -160,6 +160,11 @@ public:
     LEX_STRING lex_string = { (char*) ptr(), length() };
     return lex_string;
   }
+  LEX_CSTRING lex_cstring() const
+  {
+    LEX_CSTRING lex_cstring = { ptr(), length() };
+    return lex_cstring;
+  }
 
   void set(String &str,uint32 offset,uint32 arg_length)
   {
author	Alexander Barkov <bar@mnogosearch.org>	2013-09-26 18:02:17 +0400
committer	Alexander Barkov <bar@mnogosearch.org>	2013-09-26 18:02:17 +0400
commit	285e7aa179a6081531be3274772b89e8989fd107 (patch)
tree	71836ea6e49f48fff6e957bcef38628e7b979001 /sql
parent	9d83468e78ba23f024ce3c11443913ad75cf1ea5 (diff)
download	mariadb-git-285e7aa179a6081531be3274772b89e8989fd107.tar.gz