diff options
author | unknown <bar@mysql.com/bar.myoffice.izhnet.ru> | 2007-10-05 12:15:11 +0500 |
---|---|---|
committer | unknown <bar@mysql.com/bar.myoffice.izhnet.ru> | 2007-10-05 12:15:11 +0500 |
commit | 64b19133820078a555e73ed1ff429984e82d1041 (patch) | |
tree | a8f229ad3a0b54c8e87866f471532633a263af0e /sql/item_cmpfunc.cc | |
parent | 4e331363cf9603cfd46c18d272985695b486911f (diff) | |
download | mariadb-git-64b19133820078a555e73ed1ff429984e82d1041.tar.gz |
Bug#31081 server crash in regexp function
Problem: The "regex" library written by Henry Spencer
does not support tricky character sets like UCS2.
Fix: convert tricky character sets to UTF8 before calling
regex functions.
mysql-test/r/ctype_uca.result:
Adding tests
mysql-test/r/ctype_ucs.result:
Adding tests
mysql-test/r/ctype_utf8.result:
Adding tests
mysql-test/r/func_regexp.result:
Adding tests
mysql-test/t/ctype_uca.test:
Adding tests
mysql-test/t/ctype_ucs.test:
Adding tests
mysql-test/t/ctype_utf8.test:
Adding tests
mysql-test/t/func_regexp.test:
Adding tests
sql/item_cmpfunc.cc:
- Adding new method Item_func_regex::regcomp()
to share more code between fix_fields() and val_int()
- Adding conversion from ASCII-incompatible charsets like UCS2
to UTF8, because the "regexp" does not support these charsets
- Additional optimization: calculate flags for regcomp only
once in fix_fields, instead of every regcomp()
sql/item_cmpfunc.h:
Adding prototypes for new members and methods
mysql-test/include/ctype_regex.inc:
New BitKeeper file ``mysql-test/include/ctype_regex.inc''
Moving common regular expression tests into a separate
file and uncluding it into func_regexp and into many ctype_xxx tests.
Diffstat (limited to 'sql/item_cmpfunc.cc')
-rw-r--r-- | sql/item_cmpfunc.cc | 122 |
1 files changed, 72 insertions, 50 deletions
diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index 86eb10d50b0..51b3e8cda6b 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -4226,6 +4226,51 @@ void Item_func_like::cleanup() #ifdef USE_REGEX bool +Item_func_regex::regcomp(bool send_error) +{ + char buff[MAX_FIELD_WIDTH]; + String tmp(buff,sizeof(buff),&my_charset_bin); + String *res= args[1]->val_str(&tmp); + int error; + + if (args[1]->null_value) + return TRUE; + + if (regex_compiled) + { + if (!stringcmp(res, &prev_regexp)) + return FALSE; + prev_regexp.copy(*res); + my_regfree(&preg); + regex_compiled= 0; + } + + if (cmp_collation.collation != regex_lib_charset) + { + /* Convert UCS2 strings to UTF8 */ + uint dummy_errors; + if (conv.copy(res->ptr(), res->length(), res->charset(), + regex_lib_charset, &dummy_errors)) + return TRUE; + res= &conv; + } + + if ((error= my_regcomp(&preg, res->c_ptr(), + regex_lib_flags, regex_lib_charset))) + { + if (send_error) + { + (void) my_regerror(error, &preg, buff, sizeof(buff)); + my_error(ER_REGEXP_ERROR, MYF(0), buff); + } + return TRUE; + } + regex_compiled= 1; + return FALSE; +} + + +bool Item_func_regex::fix_fields(THD *thd, Item **ref) { DBUG_ASSERT(fixed == 0); @@ -4241,34 +4286,33 @@ Item_func_regex::fix_fields(THD *thd, Item **ref) if (agg_arg_charsets(cmp_collation, args, 2, MY_COLL_CMP_CONV, 1)) return TRUE; + regex_lib_flags= (cmp_collation.collation->state & + (MY_CS_BINSORT | MY_CS_CSSORT)) ? + REG_EXTENDED | REG_NOSUB : + REG_EXTENDED | REG_NOSUB | REG_ICASE; + /* + If the case of UCS2 and other non-ASCII character sets, + we will convert patterns and strings to UTF8. + */ + regex_lib_charset= (cmp_collation.collation->mbminlen > 1) ? + &my_charset_utf8_general_ci : + cmp_collation.collation; + used_tables_cache=args[0]->used_tables() | args[1]->used_tables(); not_null_tables_cache= (args[0]->not_null_tables() | args[1]->not_null_tables()); const_item_cache=args[0]->const_item() && args[1]->const_item(); if (!regex_compiled && args[1]->const_item()) { - char buff[MAX_FIELD_WIDTH]; - String tmp(buff,sizeof(buff),&my_charset_bin); - String *res=args[1]->val_str(&tmp); if (args[1]->null_value) { // Will always return NULL maybe_null=1; return FALSE; } - int error; - if ((error= my_regcomp(&preg,res->c_ptr(), - ((cmp_collation.collation->state & - (MY_CS_BINSORT | MY_CS_CSSORT)) ? - REG_EXTENDED | REG_NOSUB : - REG_EXTENDED | REG_NOSUB | REG_ICASE), - cmp_collation.collation))) - { - (void) my_regerror(error,&preg,buff,sizeof(buff)); - my_error(ER_REGEXP_ERROR, MYF(0), buff); + if (regcomp(TRUE)) return TRUE; - } - regex_compiled=regex_is_const=1; - maybe_null=args[0]->maybe_null; + regex_is_const= 1; + maybe_null= args[0]->maybe_null; } else maybe_null=1; @@ -4281,47 +4325,25 @@ longlong Item_func_regex::val_int() { DBUG_ASSERT(fixed == 1); char buff[MAX_FIELD_WIDTH]; - String *res, tmp(buff,sizeof(buff),&my_charset_bin); + String tmp(buff,sizeof(buff),&my_charset_bin); + String *res= args[0]->val_str(&tmp); - res=args[0]->val_str(&tmp); - if (args[0]->null_value) - { - null_value=1; + if ((null_value= (args[0]->null_value || + (!regex_is_const && regcomp(FALSE))))) return 0; - } - if (!regex_is_const) - { - char buff2[MAX_FIELD_WIDTH]; - String *res2, tmp2(buff2,sizeof(buff2),&my_charset_bin); - res2= args[1]->val_str(&tmp2); - if (args[1]->null_value) + if (cmp_collation.collation != regex_lib_charset) + { + /* Convert UCS2 strings to UTF8 */ + uint dummy_errors; + if (conv.copy(res->ptr(), res->length(), res->charset(), + regex_lib_charset, &dummy_errors)) { - null_value=1; + null_value= 1; return 0; } - if (!regex_compiled || stringcmp(res2,&prev_regexp)) - { - prev_regexp.copy(*res2); - if (regex_compiled) - { - my_regfree(&preg); - regex_compiled=0; - } - if (my_regcomp(&preg,res2->c_ptr_safe(), - ((cmp_collation.collation->state & - (MY_CS_BINSORT | MY_CS_CSSORT)) ? - REG_EXTENDED | REG_NOSUB : - REG_EXTENDED | REG_NOSUB | REG_ICASE), - cmp_collation.collation)) - { - null_value=1; - return 0; - } - regex_compiled=1; - } + res= &conv; } - null_value=0; return my_regexec(&preg,res->c_ptr_safe(),0,(my_regmatch_t*) 0,0) ? 0 : 1; } |