summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--client/client_priv.h2
-rw-r--r--client/mysqldump.c61
-rw-r--r--include/m_ctype.h5
-rw-r--r--innobase/srv/srv0srv.c8
-rw-r--r--mysql-test/r/ctype_utf8.result9
-rw-r--r--mysql-test/r/union.result78
-rw-r--r--mysql-test/t/ctype_utf8.test8
-rw-r--r--mysql-test/t/union.test55
-rw-r--r--sql/item.cc4
-rw-r--r--sql/sql_union.cc20
-rw-r--r--strings/CHARSET_INFO.txt221
-rw-r--r--strings/ctype-ucs2.c166
-rw-r--r--strings/ctype-utf8.c168
13 files changed, 628 insertions, 177 deletions
diff --git a/client/client_priv.h b/client/client_priv.h
index 328c051905c..f16ec0e802b 100644
--- a/client/client_priv.h
+++ b/client/client_priv.h
@@ -45,5 +45,5 @@ enum options_client
OPT_COMPATIBLE, OPT_RECONNECT, OPT_DELIMITER, OPT_SECURE_AUTH,
OPT_OPEN_FILES_LIMIT, OPT_SET_CHARSET, OPT_CREATE_OPTIONS,
OPT_START_POSITION, OPT_STOP_POSITION, OPT_START_DATETIME, OPT_STOP_DATETIME,
- OPT_SIGINT_IGNORE
+ OPT_SIGINT_IGNORE, OPT_HEXBLOB
};
diff --git a/client/mysqldump.c b/client/mysqldump.c
index 56505afd235..1686278096b 100644
--- a/client/mysqldump.c
+++ b/client/mysqldump.c
@@ -81,7 +81,8 @@ static my_bool verbose=0,tFlag=0,cFlag=0,dFlag=0,quick= 1, extended_insert= 1,
opt_alldbs=0,opt_create_db=0,opt_first_slave=0,opt_set_charset,
opt_autocommit=0,opt_master_data,opt_disable_keys=1,opt_xml=0,
opt_delete_master_logs=0, tty_password=0,
- opt_single_transaction=0, opt_comments= 0, opt_compact= 0;
+ opt_single_transaction=0, opt_comments= 0, opt_compact= 0,
+ opt_hex_blob=0;
static ulong opt_max_allowed_packet, opt_net_buffer_length;
static MYSQL mysql_connection,*sock=0;
static char insert_pat[12 * 1024],*opt_password=0,*current_user=0,
@@ -316,6 +317,8 @@ static struct my_option my_long_options[] =
{"comments", 'i', "Write additional information.",
(gptr*) &opt_comments, (gptr*) &opt_comments, 0, GET_BOOL, NO_ARG,
1, 0, 0, 0, 0, 0},
+ {"hex-blob", OPT_HEXBLOB, "Dump BLOBs in HEX. this mode does not work with extended-insert",
+ (gptr*) &opt_hex_blob, (gptr*) &opt_hex_blob, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
};
@@ -1507,6 +1510,7 @@ static void dumpTable(uint numFields, char *table)
for (i = 0; i < mysql_num_fields(res); i++)
{
+ int is_blob;
if (!(field = mysql_fetch_field(res)))
{
sprintf(query,"%s: Not enough fields from table %s! Aborting.\n",
@@ -1515,6 +1519,17 @@ static void dumpTable(uint numFields, char *table)
error= EX_CONSCHECK;
goto err;
}
+
+ /*
+ 63 is my_charset_bin. If charsetnr is not 63,
+ we have not a BLOB but a TEXT column.
+ we'll dump it in hex only BLOB columns.
+ */
+ is_blob= (opt_hex_blob && field->charsetnr == 63 &&
+ (field->type == FIELD_TYPE_BLOB ||
+ field->type == FIELD_TYPE_LONG_BLOB ||
+ field->type == FIELD_TYPE_MEDIUM_BLOB ||
+ field->type == FIELD_TYPE_TINY_BLOB)) ? 1 : 0;
if (extended_insert)
{
ulong length = lengths[i];
@@ -1535,12 +1550,28 @@ static void dumpTable(uint numFields, char *table)
error= EX_EOM;
goto err;
}
- dynstr_append(&extended_row,"'");
- extended_row.length +=
- mysql_real_escape_string(&mysql_connection,
- &extended_row.str[extended_row.length],row[i],length);
- extended_row.str[extended_row.length]='\0';
- dynstr_append(&extended_row,"'");
+ if (opt_hex_blob && is_blob)
+ {
+ ulong counter;
+ unsigned char *ptr= row[i];
+ dynstr_append(&extended_row, "0x");
+ for (counter = 0; counter < lengths[i]; counter++)
+ {
+ char xx[3];
+ sprintf(xx, "%02X", ptr[counter]);
+ dynstr_append(&extended_row, xx);
+ }
+ }
+ else
+ {
+ dynstr_append(&extended_row,"'");
+ extended_row.length +=
+ mysql_real_escape_string(&mysql_connection,
+ &extended_row.str[extended_row.length],
+ row[i],length);
+ extended_row.str[extended_row.length]='\0';
+ dynstr_append(&extended_row,"'");
+ }
}
else
{
@@ -1591,8 +1622,20 @@ static void dumpTable(uint numFields, char *table)
print_quoted_xml(md_result_file, row[i], lengths[i]);
fputs("</field>\n", md_result_file);
}
- else
- unescape(md_result_file, row[i], lengths[i]);
+ else if (opt_hex_blob && is_blob)
+ { /* sakaik got this idea. */
+ ulong counter;
+ char xx[4];
+ unsigned char *ptr= row[i];
+ fputs("0x", md_result_file);
+ for (counter = 0; counter < lengths[i]; counter++)
+ {
+ sprintf(xx, "%02X", ptr[counter]);
+ fputs(xx, md_result_file);
+ }
+ }
+ else
+ unescape(md_result_file, row[i], lengths[i]);
}
else
{
diff --git a/include/m_ctype.h b/include/m_ctype.h
index 1f42b514a1b..ddc21070547 100644
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@@ -365,6 +365,11 @@ uint my_instr_mb(struct charset_info_st *,
const char *s, uint s_length,
my_match_t *match, uint nmatch);
+int my_wildcmp_unicode(CHARSET_INFO *cs,
+ const char *str, const char *str_end,
+ const char *wildstr, const char *wildend,
+ int escape, int w_one, int w_many,
+ MY_UNICASE_INFO **weights);
extern my_bool my_parse_charset_xml(const char *bug, uint len,
int (*add)(CHARSET_INFO *cs));
diff --git a/innobase/srv/srv0srv.c b/innobase/srv/srv0srv.c
index d913d77fdfc..b34ae9f36f4 100644
--- a/innobase/srv/srv0srv.c
+++ b/innobase/srv/srv0srv.c
@@ -951,7 +951,13 @@ retry:
trx->op_info = "sleeping before joining InnoDB queue";
- os_thread_sleep(50000);
+ /* Peter Zaitsev suggested that we take the sleep away
+ altogether. But the sleep may be good in pathological
+ situations of lots of thread switches. Simply put some
+ threads aside for a while to reduce the number of thread
+ switches. */
+
+ os_thread_sleep(10000);
trx->op_info = "";
diff --git a/mysql-test/r/ctype_utf8.result b/mysql-test/r/ctype_utf8.result
index 2e8bbc8fa92..e65eb96cb68 100644
--- a/mysql-test/r/ctype_utf8.result
+++ b/mysql-test/r/ctype_utf8.result
@@ -63,6 +63,15 @@ select 'A' like 'a' collate utf8_bin;
select _utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%');
_utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%')
1
+select convert(_latin1'Günter André' using utf8) like CONVERT(_latin1'GÜNTER%' USING utf8);
+convert(_latin1'Günter André' using utf8) like CONVERT(_latin1'GÜNTER%' USING utf8)
+1
+select CONVERT(_koi8r'×ÁÓÑ' USING utf8) LIKE CONVERT(_koi8r'÷áóñ' USING utf8);
+CONVERT(_koi8r'×ÁÓÑ' USING utf8) LIKE CONVERT(_koi8r'÷áóñ' USING utf8)
+1
+select CONVERT(_koi8r'÷áóñ' USING utf8) LIKE CONVERT(_koi8r'×ÁÓÑ' USING utf8);
+CONVERT(_koi8r'÷áóñ' USING utf8) LIKE CONVERT(_koi8r'×ÁÓÑ' USING utf8)
+1
SELECT 'a' = 'a ';
'a' = 'a '
1
diff --git a/mysql-test/r/union.result b/mysql-test/r/union.result
index fbd4f8e11dc..7820cd1d6ff 100644
--- a/mysql-test/r/union.result
+++ b/mysql-test/r/union.result
@@ -1033,3 +1033,81 @@ a
No
aaa,bbb
drop table t1,t2,t3,t4;
+create table t1 as
+(select _latin1'test') union
+(select _latin1'TEST') union
+(select _latin1'TeST');
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `test` char(4) NOT NULL default ''
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+select count(*) from t1;
+count(*)
+1
+drop table t1;
+create table t1 as
+(select _latin1'test' collate latin1_bin) union
+(select _latin1'TEST') union
+(select _latin1'TeST');
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `_latin1'test' collate latin1_bin` char(4) character set latin1 collate latin1_bin NOT NULL default ''
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+select count(*) from t1;
+count(*)
+3
+drop table t1;
+create table t1 as
+(select _latin1'test') union
+(select _latin1'TEST' collate latin1_bin) union
+(select _latin1'TeST');
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `test` char(4) character set latin1 collate latin1_bin NOT NULL default ''
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+select count(*) from t1;
+count(*)
+3
+drop table t1;
+create table t1 as
+(select _latin1'test') union
+(select _latin1'TEST') union
+(select _latin1'TeST' collate latin1_bin);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `test` char(4) character set latin1 collate latin1_bin NOT NULL default ''
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+select count(*) from t1;
+count(*)
+3
+drop table t1;
+create table t2 (
+a char character set latin1 collate latin1_swedish_ci,
+b char character set latin1 collate latin1_bin);
+create table t1 as
+(select a from t2) union
+(select b from t2);
+ERROR HY000: Illegal mix of collations for operation 'UNION'
+create table t1 as
+(select a collate latin1_german1_ci from t2) union
+(select b from t2);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a collate latin1_german1_ci` char(1) character set latin1 collate latin1_german1_ci default NULL
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+drop table t1;
+create table t1 as
+(select a from t2) union
+(select b collate latin1_german1_ci from t2);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` char(1) character set latin1 collate latin1_german1_ci default NULL
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+drop table t1;
+drop table t2;
diff --git a/mysql-test/t/ctype_utf8.test b/mysql-test/t/ctype_utf8.test
index c74bb59ae6b..238cd6daef3 100644
--- a/mysql-test/t/ctype_utf8.test
+++ b/mysql-test/t/ctype_utf8.test
@@ -33,6 +33,14 @@ select 'A' like 'a';
select 'A' like 'a' collate utf8_bin;
select _utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%');
+# Bug #6040: can't retrieve records with umlaut
+# characters in case insensitive manner.
+# Case insensitive search LIKE comparison
+# was broken for multibyte characters:
+select convert(_latin1'Günter André' using utf8) like CONVERT(_latin1'GÜNTER%' USING utf8);
+select CONVERT(_koi8r'×ÁÓÑ' USING utf8) LIKE CONVERT(_koi8r'÷áóñ' USING utf8);
+select CONVERT(_koi8r'÷áóñ' USING utf8) LIKE CONVERT(_koi8r'×ÁÓÑ' USING utf8);
+
#
# Check the following:
# "a" == "a "
diff --git a/mysql-test/t/union.test b/mysql-test/t/union.test
index c5e72e85835..6e16a2b02aa 100644
--- a/mysql-test/t/union.test
+++ b/mysql-test/t/union.test
@@ -595,3 +595,58 @@ select a as a from t3 union select "1";
select a as a from t4 union select a from t3;
select a as a from t1 union select a from t4;
drop table t1,t2,t3,t4;
+
+#
+# Bug #6139 UNION doesn't understand collate in the column of second select
+#
+create table t1 as
+(select _latin1'test') union
+(select _latin1'TEST') union
+(select _latin1'TeST');
+show create table t1;
+select count(*) from t1;
+drop table t1;
+
+create table t1 as
+(select _latin1'test' collate latin1_bin) union
+(select _latin1'TEST') union
+(select _latin1'TeST');
+show create table t1;
+select count(*) from t1;
+drop table t1;
+
+create table t1 as
+(select _latin1'test') union
+(select _latin1'TEST' collate latin1_bin) union
+(select _latin1'TeST');
+show create table t1;
+select count(*) from t1;
+drop table t1;
+
+create table t1 as
+(select _latin1'test') union
+(select _latin1'TEST') union
+(select _latin1'TeST' collate latin1_bin);
+show create table t1;
+select count(*) from t1;
+drop table t1;
+
+create table t2 (
+a char character set latin1 collate latin1_swedish_ci,
+b char character set latin1 collate latin1_bin);
+--error 1271
+create table t1 as
+(select a from t2) union
+(select b from t2);
+create table t1 as
+(select a collate latin1_german1_ci from t2) union
+(select b from t2);
+show create table t1;
+drop table t1;
+create table t1 as
+(select a from t2) union
+(select b collate latin1_german1_ci from t2);
+show create table t1;
+drop table t1;
+drop table t2;
+
diff --git a/sql/item.cc b/sql/item.cc
index 0366ea29485..304579d65a2 100644
--- a/sql/item.cc
+++ b/sql/item.cc
@@ -2558,8 +2558,8 @@ bool Item_type_holder::join_types(THD *thd, Item *item)
if (use_new_field || use_expression_type ||
(new_result_type != item_type) || (new_length > max_length) ||
(!maybe_null && item->maybe_null) ||
- (item_type == STRING_RESULT &&
- !my_charset_same(collation.collation, item->collation.collation)))
+ (item_type == STRING_RESULT &&
+ collation.collation != item->collation.collation))
{
if (use_expression_type || item->type() != Item::FIELD_ITEM)
field_example= 0;
diff --git a/sql/sql_union.cc b/sql/sql_union.cc
index b46cfc05538..fc2d2a3a5e4 100644
--- a/sql/sql_union.cc
+++ b/sql/sql_union.cc
@@ -264,9 +264,27 @@ int st_select_lex_unit::prepare(THD *thd_arg, select_result *sel_result,
}
}
- // it is not single select
if (first_select->next_select())
{
+
+ // it is not single select
+
+ /*
+ Check that it was possible to aggregate
+ all collations together for UNION.
+ */
+ List_iterator_fast<Item> tp(types);
+ Item *type;
+ while ((type= tp++))
+ {
+ if (type->result_type() == STRING_RESULT &&
+ type->collation.derivation == DERIVATION_NONE)
+ {
+ my_error(ER_CANT_AGGREGATE_NCOLLATIONS, MYF(0), "UNION");
+ goto err;
+ }
+ }
+
union_result->tmp_table_param.field_count= types.elements;
if (!(table= create_tmp_table(thd_arg,
&union_result->tmp_table_param, types,
diff --git a/strings/CHARSET_INFO.txt b/strings/CHARSET_INFO.txt
new file mode 100644
index 00000000000..883000e7ade
--- /dev/null
+++ b/strings/CHARSET_INFO.txt
@@ -0,0 +1,221 @@
+
+CHARSET_INFO
+============
+A structure containing data for charset+collation pair implementation.
+
+Virtual functions which use this data are collected
+into separate structures MY_CHARSET_HANDLER and
+MY_COLLATION_HANDLER.
+
+
+typedef struct charset_info_st
+{
+ uint number;
+ uint primary_number;
+ uint binary_number;
+ uint state;
+
+ const char *csname;
+ const char *name;
+ const char *comment;
+
+ uchar *ctype;
+ uchar *to_lower;
+ uchar *to_upper;
+ uchar *sort_order;
+
+ uint16 *tab_to_uni;
+ MY_UNI_IDX *tab_from_uni;
+
+ uchar state_map[256];
+ uchar ident_map[256];
+
+ uint strxfrm_multiply;
+ uint mbminlen;
+ uint mbmaxlen;
+ char max_sort_char; /* For LIKE optimization */
+
+ MY_CHARSET_HANDLER *cset;
+ MY_COLLATION_HANDLER *coll;
+
+} CHARSET_INFO;
+
+
+CHARSET_INFO fields description:
+===============================
+
+
+Numbers (identifiers)
+---------------------
+
+number - an ID uniquely identifying this charset+collation pair.
+
+primary_number - ID of a charset+collation pair, which consists
+of the same character set and the default collation of this
+character set. Not really used now. Intended to optimize some
+parts of the code where we need to find the default collation
+using its non-default counterpart for the given character set.
+
+binary_numner - ID of a charset+collation pair, which consists
+of the same character set and the binary collation of this
+character set. Not really used now.
+
+Names
+-----
+
+ csname - name of the character set for this charset+collation pair.
+ name - name of the collation for this charset+collation pair.
+ comment - a text comment, dysplayed in "Description" column of
+ SHOW CHARACTER SET output.
+
+Conversion tables
+-----------------
+
+ ctype - pointer to array[257] of "type of characters"
+ bit mask for each chatacter, e.g. if a
+ character is a digit or a letter or a separator, etc.
+ to_lower - pointer to arrat[256] used in LCASE()
+ to_upper - pointer to array[256] used in UCASE()
+ sort_order - pointer to array[256] used for strings comparison
+
+
+
+Unicode conversion data
+-----------------------
+For 8bit character sets:
+
+tab_to_uni : array[256] of charset->Unicode translation
+tab_from_uni: a structure for Unicode->charset translation
+
+Non-8 bit charsets have their own structures per charset
+hidden in correspondent ctype-xxx.c file and don't use
+tab_to_uni and tab_from_uni tables.
+
+
+Parser maps
+-----------
+state_map[]
+ident_map[]
+
+ These maps are to quickly identify if a character is
+an identificator part, a digit, a special character,
+or a part of other SQL language lexical item.
+
+Probably can be combined with ctype array in the future.
+But for some reasons these two arrays are used in the parser,
+while a separate ctype[] array is used in the other part of the
+code, like fulltext, etc.
+
+
+Misc fields
+-----------
+
+ strxfrm_multiply - how many times a sort key (i.e. a string
+ which can be passed into memcmp() for comparison)
+ can be longer than the original string.
+ Usually it is 1. For some complex
+ collations it can be bigger. For example
+ in latin1_german2_ci, a sort key is up to
+ twice longer than the original string.
+ e.g. Letter 'A' with two dots above is
+ substituted with 'AE'.
+ mbminlen - mininum multibyte sequence length.
+ Now always 1 except ucs2. For ucs2
+ it is 2.
+ mbmaxlen - maximum multibyte sequence length.
+ 1 for 8bit charsets. Can be also 2 or 3.
+
+
+
+MY_CHARSET_HANDLER
+==================
+
+MY_CHARSET_HANDLER is a collection of character-set
+related routines. Defined in m_ctype.h. Have the
+following set of functions:
+
+Multibyte routines
+------------------
+ismbchar() - detects if the given string is a multibyte sequence
+mbcharlen() - retuturns length of multibyte sequence starting with
+ the given character
+numchars() - returns number of characters in the given string, e.g.
+ in SQL function CHAR_LENGTH().
+charpos() - calculates the offset of the given position in the string.
+ Used in SQL functions LEFT(), RIGHT(), SUBSTRING(),
+ INSERT()
+
+well_formed_length()
+ - finds the length of correctly formed multybyte beginning.
+ Used in INSERTs to cut a beginning of the given string
+ which is
+ a) "well formed" according to the given character set.
+ b) can fit into the given data type
+ Terminates the string in the good position, taking in account
+ multibyte character boundaries.
+
+lengthsp() - returns the length of the given string without traling spaces.
+
+
+Unicode conversion routines
+---------------------------
+mb_wc - converts the left multibyte sequence into it Unicode code.
+mc_mb - converts the given Unicode code into multibyte sequence.
+
+
+Case and sort convertion
+------------------------
+caseup_str - converts the given 0-terminated string into the upper case
+casedn_str - converts the given 0-terminated string into the lower case
+caseup - converts the given string into the lower case using length
+casedn - converts the given string into the lower case using length
+
+Number-to-string conversion routines
+------------------------------------
+snprintf()
+long10_to_str()
+longlong10_to_str()
+
+The names are pretty self-descripting.
+
+String padding routines
+-----------------------
+fill() - writes the given Unicode value into the given string
+ with the given length. Used to pad the string, usually
+ with space character, according to the given charset.
+
+String-to-numner conversion routines
+------------------------------------
+strntol()
+strntoul()
+strntoll()
+strntoull()
+strntod()
+
+These functions are almost for the same thing with their
+STDLIB counterparts, but also:
+ - accept length instead of 0-terminator
+ - and are character set dependant
+
+Simple scanner routines
+-----------------------
+scan() - to skip leading spaces in the given string.
+ Used when a string value is inserted into a numeric field.
+
+
+
+MY_COLLATION_HANDLER
+====================
+strnncoll() - compares two strings according to the given collation
+strnncollsp() - like the above but ignores trailing spaces
+strnxfrm() - makes a sort key suitable for memcmp() corresponding
+ to the given string
+like_range() - creates a LIKE range, for optimizer
+wildcmp() - wildcard comparison, for LIKE
+strcasecmp() - 0-terminated string comparison
+instr() - finds the first substring appearence in the string
+hash_sort() - calculates hash value taking in account
+ the collation rules, e.g. case-insensitivity,
+ accent sensitivity, etc.
+
+ \ No newline at end of file
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index 3247e1d7424..851c2044f47 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -1231,172 +1231,14 @@ uint my_lengthsp_ucs2(CHARSET_INFO *cs __attribute__((unused)),
}
-/*
-** Compare string against string with wildcard
-** 0 if matched
-** -1 if not matched with wildcard
-** 1 if matched with wildcard
-*/
-
-static
-int my_wildcmp_ucs2(CHARSET_INFO *cs,
- const char *str,const char *str_end,
- const char *wildstr,const char *wildend,
- int escape, int w_one, int w_many,
- MY_UNICASE_INFO **weights)
-{
- int result= -1; /* Not found, using wildcards */
- my_wc_t s_wc, w_wc;
- int scan, plane;
-
- while (wildstr != wildend)
- {
-
- while (1)
- {
- scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr,
- (const uchar*)wildend);
- if (scan <= 0)
- return 1;
-
- if (w_wc == (my_wc_t)escape)
- {
- wildstr+= scan;
- scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr,
- (const uchar*)wildend);
- if (scan <= 0)
- return 1;
- }
-
- if (w_wc == (my_wc_t)w_many)
- {
- result= 1; /* Found an anchor char */
- break;
- }
-
- wildstr+= scan;
- scan= my_ucs2_uni(cs, &s_wc, (const uchar*)str, (const uchar*)str_end);
- if (scan <=0)
- return 1;
- str+= scan;
-
- if (w_wc == (my_wc_t)w_one)
- {
- result= 1; /* Found an anchor char */
- }
- else
- {
- if (weights)
- {
- plane=(s_wc>>8) & 0xFF;
- s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc;
- plane=(w_wc>>8) & 0xFF;
- w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc;
- }
- if (s_wc != w_wc)
- return 1; /* No match */
- }
- if (wildstr == wildend)
- return (str != str_end); /* Match if both are at end */
- }
-
-
- if (w_wc == (my_wc_t)w_many)
- { /* Found w_many */
-
- /* Remove any '%' and '_' from the wild search string */
- for ( ; wildstr != wildend ; )
- {
- scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr,
- (const uchar*)wildend);
- if (scan <= 0)
- return 1;
-
- if (w_wc == (my_wc_t)w_many)
- {
- wildstr+= scan;
- continue;
- }
-
- if (w_wc == (my_wc_t)w_one)
- {
- wildstr+= scan;
- scan= my_ucs2_uni(cs, &s_wc, (const uchar*)str,
- (const uchar*)str_end);
- if (scan <=0)
- return 1;
- str+= scan;
- continue;
- }
- break; /* Not a wild character */
- }
-
- if (wildstr == wildend)
- return 0; /* Ok if w_many is last */
-
- if (str == str_end)
- return -1;
-
- scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr,
- (const uchar*)wildend);
- if (scan <= 0)
- return 1;
-
- if (w_wc == (my_wc_t)escape)
- {
- wildstr+= scan;
- scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr,
- (const uchar*)wildend);
- if (scan <= 0)
- return 1;
- }
-
- while (1)
- {
- /* Skip until the first character from wildstr is found */
- while (str != str_end)
- {
- scan= my_ucs2_uni(cs,&s_wc, (const uchar*)str,
- (const uchar*)str_end);
- if (scan <= 0)
- return 1;
- if (weights)
- {
- plane=(s_wc>>8) & 0xFF;
- s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc;
- plane=(w_wc>>8) & 0xFF;
- w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc;
- }
-
- if (s_wc == w_wc)
- break;
- str+= scan;
- }
- if (str == str_end)
- return -1;
-
- result= my_wildcmp_ucs2(cs,str,str_end,wildstr,wildend,escape,
- w_one,w_many,weights);
-
- if (result <= 0)
- return result;
-
- str+= scan;
- }
- }
- }
- return (str != str_end ? 1 : 0);
-}
-
-
static
int my_wildcmp_ucs2_ci(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many)
{
- return my_wildcmp_ucs2(cs,str,str_end,wildstr,wildend,
- escape,w_one,w_many,uni_plane);
+ return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
+ escape,w_one,w_many,uni_plane);
}
@@ -1406,8 +1248,8 @@ int my_wildcmp_ucs2_bin(CHARSET_INFO *cs,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many)
{
- return my_wildcmp_ucs2(cs,str,str_end,wildstr,wildend,
- escape,w_one,w_many,NULL);
+ return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
+ escape,w_one,w_many,NULL);
}
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index fd6610b72b1..f5d40fb8ded 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -1518,6 +1518,161 @@ MY_UNICASE_INFO *uni_plane[256]={
};
+
+/*
+** Compare string against string with wildcard
+** This function is used in UTF8 and UCS2
+**
+** 0 if matched
+** -1 if not matched with wildcard
+** 1 if matched with wildcard
+*/
+
+int my_wildcmp_unicode(CHARSET_INFO *cs,
+ const char *str,const char *str_end,
+ const char *wildstr,const char *wildend,
+ int escape, int w_one, int w_many,
+ MY_UNICASE_INFO **weights)
+{
+ int result= -1; /* Not found, using wildcards */
+ my_wc_t s_wc, w_wc;
+ int scan, plane;
+ int (*mb_wc)(struct charset_info_st *cs, my_wc_t *wc,
+ const unsigned char *s,const unsigned char *e);
+ mb_wc= cs->cset->mb_wc;
+
+ while (wildstr != wildend)
+ {
+ while (1)
+ {
+ if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
+ (const uchar*)wildend)) <= 0)
+ return 1;
+
+ if (w_wc == (my_wc_t)escape)
+ {
+ wildstr+= scan;
+ if ((scan= mb_wc(cs,&w_wc, (const uchar*)wildstr,
+ (const uchar*)wildend)) <= 0)
+ return 1;
+ }
+
+ if (w_wc == (my_wc_t)w_many)
+ {
+ result= 1; /* Found an anchor char */
+ break;
+ }
+
+ wildstr+= scan;
+ if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
+ (const uchar*)str_end)) <=0)
+ return 1;
+ str+= scan;
+
+ if (w_wc == (my_wc_t)w_one)
+ {
+ result= 1; /* Found an anchor char */
+ }
+ else
+ {
+ if (weights)
+ {
+ plane=(s_wc>>8) & 0xFF;
+ s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc;
+ plane=(w_wc>>8) & 0xFF;
+ w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc;
+ }
+ if (s_wc != w_wc)
+ return 1; /* No match */
+ }
+ if (wildstr == wildend)
+ return (str != str_end); /* Match if both are at end */
+ }
+
+
+ if (w_wc == (my_wc_t)w_many)
+ { /* Found w_many */
+
+ /* Remove any '%' and '_' from the wild search string */
+ for ( ; wildstr != wildend ; )
+ {
+ if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
+ (const uchar*)wildend)) <= 0)
+ return 1;
+
+ if (w_wc == (my_wc_t)w_many)
+ {
+ wildstr+= scan;
+ continue;
+ }
+
+ if (w_wc == (my_wc_t)w_one)
+ {
+ wildstr+= scan;
+ if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
+ (const uchar*)str_end)) <=0)
+ return 1;
+ str+= scan;
+ continue;
+ }
+ break; /* Not a wild character */
+ }
+
+ if (wildstr == wildend)
+ return 0; /* Ok if w_many is last */
+
+ if (str == str_end)
+ return -1;
+
+ if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
+ (const uchar*)wildend)) <=0)
+ return 1;
+
+ if (w_wc == (my_wc_t)escape)
+ {
+ wildstr+= scan;
+ if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
+ (const uchar*)wildend)) <=0)
+ return 1;
+ }
+
+ while (1)
+ {
+ /* Skip until the first character from wildstr is found */
+ while (str != str_end)
+ {
+ if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
+ (const uchar*)str_end)) <=0)
+ return 1;
+ if (weights)
+ {
+ plane=(s_wc>>8) & 0xFF;
+ s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc;
+ plane=(w_wc>>8) & 0xFF;
+ w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc;
+ }
+
+ if (s_wc == w_wc)
+ break;
+ str+= scan;
+ }
+ if (str == str_end)
+ return -1;
+
+ result= my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
+ escape, w_one, w_many,
+ weights);
+
+ if (result <= 0)
+ return result;
+
+ str+= scan;
+ }
+ }
+ }
+ return (str != str_end ? 1 : 0);
+}
+
#endif
@@ -1992,6 +2147,17 @@ static int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t)
return my_strncasecmp_utf8(cs, s, t, len);
}
+static
+int my_wildcmp_utf8(CHARSET_INFO *cs,
+ const char *str,const char *str_end,
+ const char *wildstr,const char *wildend,
+ int escape, int w_one, int w_many)
+{
+ return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
+ escape,w_one,w_many,uni_plane);
+}
+
+
static int my_strnxfrm_utf8(CHARSET_INFO *cs,
uchar *dst, uint dstlen,
const uchar *src, uint srclen)
@@ -2060,7 +2226,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
my_strnncollsp_utf8,
my_strnxfrm_utf8,
my_like_range_mb,
- my_wildcmp_mb,
+ my_wildcmp_utf8,
my_strcasecmp_utf8,
my_instr_mb,
my_hash_sort_utf8