Bug#31081 server crash in regexp function

Problem: The "regex" library written by Henry Spencer does not support tricky character sets like UCS2. Fix: convert tricky character sets to UTF8 before calling regex functions. mysql-test/r/ctype_uca.result: Adding tests mysql-test/r/ctype_ucs.result: Adding tests mysql-test/r/ctype_utf8.result: Adding tests mysql-test/r/func_regexp.result: Adding tests mysql-test/t/ctype_uca.test: Adding tests mysql-test/t/ctype_ucs.test: Adding tests mysql-test/t/ctype_utf8.test: Adding tests mysql-test/t/func_regexp.test: Adding tests sql/item_cmpfunc.cc: - Adding new method Item_func_regex::regcomp() to share more code between fix_fields() and val_int() - Adding conversion from ASCII-incompatible charsets like UCS2 to UTF8, because the "regexp" does not support these charsets - Additional optimization: calculate flags for regcomp only once in fix_fields, instead of every regcomp() sql/item_cmpfunc.h: Adding prototypes for new members and methods mysql-test/include/ctype_regex.inc: New BitKeeper file ``mysql-test/include/ctype_regex.inc'' Moving common regular expression tests into a separate file and uncluding it into func_regexp and into many ctype_xxx tests.
author: unknown <bar@mysql.com/bar.myoffice.izhnet.ru> 2007-10-05 12:15:11 +0500
committer: unknown <bar@mysql.com/bar.myoffice.izhnet.ru> 2007-10-05 12:15:11 +0500
commit: 64b19133820078a555e73ed1ff429984e82d1041 (patch)
tree: a8f229ad3a0b54c8e87866f471532633a263af0e /sql/item_cmpfunc.cc
parent: 4e331363cf9603cfd46c18d272985695b486911f (diff)
download: mariadb-git-64b19133820078a555e73ed1ff429984e82d1041.tar.gz
1 files changed, 72 insertions, 50 deletions
diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc
index 86eb10d50b0..51b3e8cda6b 100644
--- a/sql/item_cmpfunc.cc
+++ b/sql/item_cmpfunc.cc
@@ -4226,6 +4226,51 @@ void Item_func_like::cleanup()
 #ifdef USE_REGEX
 
 bool
+Item_func_regex::regcomp(bool send_error)
+{
+  char buff[MAX_FIELD_WIDTH];
+  String tmp(buff,sizeof(buff),&my_charset_bin);
+  String *res= args[1]->val_str(&tmp);
+  int error;
+
+  if (args[1]->null_value)
+    return TRUE;
+
+  if (regex_compiled)
+  {
+    if (!stringcmp(res, &prev_regexp))
+      return FALSE;
+    prev_regexp.copy(*res);
+    my_regfree(&preg);
+    regex_compiled= 0;
+  }
+
+  if (cmp_collation.collation != regex_lib_charset)
+  {
+    /* Convert UCS2 strings to UTF8 */
+    uint dummy_errors;
+    if (conv.copy(res->ptr(), res->length(), res->charset(),
+                  regex_lib_charset, &dummy_errors))
+      return TRUE;
+    res= &conv;
+  }
+
+  if ((error= my_regcomp(&preg, res->c_ptr(),
+                         regex_lib_flags, regex_lib_charset)))
+  {
+    if (send_error)
+    {
+      (void) my_regerror(error, &preg, buff, sizeof(buff));
+      my_error(ER_REGEXP_ERROR, MYF(0), buff);
+    }
+    return TRUE;
+  }
+  regex_compiled= 1;
+  return FALSE;
+}
+
+
+bool
 Item_func_regex::fix_fields(THD *thd, Item **ref)
 {
   DBUG_ASSERT(fixed == 0);
@@ -4241,34 +4286,33 @@ Item_func_regex::fix_fields(THD *thd, Item **ref)
   if (agg_arg_charsets(cmp_collation, args, 2, MY_COLL_CMP_CONV, 1))
     return TRUE;
 
+  regex_lib_flags= (cmp_collation.collation->state &
+                    (MY_CS_BINSORT | MY_CS_CSSORT)) ?
+                   REG_EXTENDED | REG_NOSUB :
+                   REG_EXTENDED | REG_NOSUB | REG_ICASE;
+  /*
+    If the case of UCS2 and other non-ASCII character sets,
+    we will convert patterns and strings to UTF8.
+  */
+  regex_lib_charset= (cmp_collation.collation->mbminlen > 1) ?
+                     &my_charset_utf8_general_ci :
+                     cmp_collation.collation;
+
   used_tables_cache=args[0]->used_tables() | args[1]->used_tables();
   not_null_tables_cache= (args[0]->not_null_tables() |
 			  args[1]->not_null_tables());
   const_item_cache=args[0]->const_item() && args[1]->const_item();
   if (!regex_compiled && args[1]->const_item())
   {
-    char buff[MAX_FIELD_WIDTH];
-    String tmp(buff,sizeof(buff),&my_charset_bin);
-    String *res=args[1]->val_str(&tmp);
     if (args[1]->null_value)
     {						// Will always return NULL
       maybe_null=1;
       return FALSE;
     }
-    int error;
-    if ((error= my_regcomp(&preg,res->c_ptr(),
-                           ((cmp_collation.collation->state &
-                             (MY_CS_BINSORT | MY_CS_CSSORT)) ?
-                            REG_EXTENDED | REG_NOSUB :
-                            REG_EXTENDED | REG_NOSUB | REG_ICASE),
-                           cmp_collation.collation)))
-    {
-      (void) my_regerror(error,&preg,buff,sizeof(buff));
-      my_error(ER_REGEXP_ERROR, MYF(0), buff);
+    if (regcomp(TRUE))
       return TRUE;
-    }
-    regex_compiled=regex_is_const=1;
-    maybe_null=args[0]->maybe_null;
+    regex_is_const= 1;
+    maybe_null= args[0]->maybe_null;
   }
   else
     maybe_null=1;
@@ -4281,47 +4325,25 @@ longlong Item_func_regex::val_int()
 {
   DBUG_ASSERT(fixed == 1);
   char buff[MAX_FIELD_WIDTH];
-  String *res, tmp(buff,sizeof(buff),&my_charset_bin);
+  String tmp(buff,sizeof(buff),&my_charset_bin);
+  String *res= args[0]->val_str(&tmp);
 
-  res=args[0]->val_str(&tmp);
-  if (args[0]->null_value)
-  {
-    null_value=1;
+  if ((null_value= (args[0]->null_value ||
+                    (!regex_is_const && regcomp(FALSE)))))
     return 0;
-  }
-  if (!regex_is_const)
-  {
-    char buff2[MAX_FIELD_WIDTH];
-    String *res2, tmp2(buff2,sizeof(buff2),&my_charset_bin);
 
-    res2= args[1]->val_str(&tmp2);
-    if (args[1]->null_value)
+  if (cmp_collation.collation != regex_lib_charset)
+  {
+    /* Convert UCS2 strings to UTF8 */
+    uint dummy_errors;
+    if (conv.copy(res->ptr(), res->length(), res->charset(),
+                  regex_lib_charset, &dummy_errors))
     {
-      null_value=1;
+      null_value= 1;
       return 0;
     }
-    if (!regex_compiled || stringcmp(res2,&prev_regexp))
-    {
-      prev_regexp.copy(*res2);
-      if (regex_compiled)
-      {
-	my_regfree(&preg);
-	regex_compiled=0;
-      }
-      if (my_regcomp(&preg,res2->c_ptr_safe(),
-                     ((cmp_collation.collation->state &
-                       (MY_CS_BINSORT | MY_CS_CSSORT)) ?
-                      REG_EXTENDED | REG_NOSUB :
-                      REG_EXTENDED | REG_NOSUB | REG_ICASE),
-                     cmp_collation.collation))
-      {
-	null_value=1;
-	return 0;
-      }
-      regex_compiled=1;
-    }
+    res= &conv;
   }
-  null_value=0;
   return my_regexec(&preg,res->c_ptr_safe(),0,(my_regmatch_t*) 0,0) ? 0 : 1;
 }
author	unknown <bar@mysql.com/bar.myoffice.izhnet.ru>	2007-10-05 12:15:11 +0500
committer	unknown <bar@mysql.com/bar.myoffice.izhnet.ru>	2007-10-05 12:15:11 +0500
commit	64b19133820078a555e73ed1ff429984e82d1041 (patch)
tree	a8f229ad3a0b54c8e87866f471532633a263af0e /sql/item_cmpfunc.cc
parent	4e331363cf9603cfd46c18d272985695b486911f (diff)
download	mariadb-git-64b19133820078a555e73ed1ff429984e82d1041.tar.gz