diff options
-rw-r--r-- | BitKeeper/etc/collapsed | 1 | ||||
-rw-r--r-- | include/m_ctype.h | 12 | ||||
-rw-r--r-- | mysql-test/r/lowercase_table.result | 24 | ||||
-rw-r--r-- | mysql-test/r/rpl_ignore_table.result | 16 | ||||
-rw-r--r-- | mysql-test/t/lowercase_table.test | 20 | ||||
-rw-r--r-- | mysql-test/t/rpl_ignore_table-slave.opt | 2 | ||||
-rw-r--r-- | mysql-test/t/rpl_ignore_table.test | 22 | ||||
-rw-r--r-- | sql/sql_parse.cc | 20 | ||||
-rw-r--r-- | strings/ctype-bin.c | 3 | ||||
-rw-r--r-- | strings/ctype-mb.c | 24 | ||||
-rw-r--r-- | strings/ctype-simple.c | 18 | ||||
-rw-r--r-- | strings/ctype-ucs2.c | 8 | ||||
-rw-r--r-- | strings/ctype-utf8.c | 135 |
13 files changed, 270 insertions, 35 deletions
diff --git a/BitKeeper/etc/collapsed b/BitKeeper/etc/collapsed index 224b77be41b..dd7fddf1de3 100644 --- a/BitKeeper/etc/collapsed +++ b/BitKeeper/etc/collapsed @@ -8,3 +8,4 @@ 4519a6c5BVUxEHTf5iJnjZkixMBs8g 451ab499rgdjXyOnUDqHu-wBDoS-OQ 452c6c6dAjuNghfc1ObZ_UQ5SCl85g +4538a7b0EbDHHkWPbIwxO6ZIDdg6Dg diff --git a/include/m_ctype.h b/include/m_ctype.h index 5f946a3b443..0d586030735 100644 --- a/include/m_ctype.h +++ b/include/m_ctype.h @@ -190,8 +190,8 @@ typedef struct my_charset_handler_st const unsigned char *s, const unsigned char *e); /* Functions for case and sort conversion */ - void (*caseup_str)(struct charset_info_st *, char *); - void (*casedn_str)(struct charset_info_st *, char *); + uint (*caseup_str)(struct charset_info_st *, char *); + uint (*casedn_str)(struct charset_info_st *, char *); uint (*caseup)(struct charset_info_st *, char *src, uint srclen, char *dst, uint dstlen); uint (*casedn)(struct charset_info_st *, char *src, uint srclen, @@ -324,8 +324,8 @@ extern uint my_instr_simple(struct charset_info_st *, /* Functions for 8bit */ -extern void my_caseup_str_8bit(CHARSET_INFO *, char *); -extern void my_casedn_str_8bit(CHARSET_INFO *, char *); +extern uint my_caseup_str_8bit(CHARSET_INFO *, char *); +extern uint my_casedn_str_8bit(CHARSET_INFO *, char *); extern uint my_caseup_8bit(CHARSET_INFO *, char *src, uint srclen, char *dst, uint dstlen); extern uint my_casedn_8bit(CHARSET_INFO *, char *src, uint srclen, @@ -415,8 +415,8 @@ int my_mbcharlen_8bit(CHARSET_INFO *, uint c); /* Functions for multibyte charsets */ -extern void my_caseup_str_mb(CHARSET_INFO *, char *); -extern void my_casedn_str_mb(CHARSET_INFO *, char *); +extern uint my_caseup_str_mb(CHARSET_INFO *, char *); +extern uint my_casedn_str_mb(CHARSET_INFO *, char *); extern uint my_caseup_mb(CHARSET_INFO *, char *src, uint srclen, char *dst, uint dstlen); extern uint my_casedn_mb(CHARSET_INFO *, char *src, uint srclen, diff --git a/mysql-test/r/lowercase_table.result b/mysql-test/r/lowercase_table.result index 7705961d08d..6df3cf61ddb 100644 --- a/mysql-test/r/lowercase_table.result +++ b/mysql-test/r/lowercase_table.result @@ -84,3 +84,27 @@ create table t2 like T1; drop table t1, t2; show tables; Tables_in_test +set names utf8; +drop table if exists İ,İİ; +create table İ (s1 int); +show create table İ; +Table Create Table +İ CREATE TABLE `i` ( + `s1` int(11) default NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +show tables; +Tables_in_test +i +drop table İ; +create table İİ (s1 int); +show create table İİ; +Table Create Table +İİ CREATE TABLE `ii` ( + `s1` int(11) default NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +show tables; +Tables_in_test +ii +drop table İİ; +set names latin1; +End of 5.0 tests diff --git a/mysql-test/r/rpl_ignore_table.result b/mysql-test/r/rpl_ignore_table.result index 356a9dcb2f8..136cf5cc5eb 100644 --- a/mysql-test/r/rpl_ignore_table.result +++ b/mysql-test/r/rpl_ignore_table.result @@ -14,3 +14,19 @@ SELECT * FROM t4; a DROP TABLE t1; DROP TABLE t4; +DROP TABLE IF EXISTS t5; +CREATE TABLE t5 ( +word varchar(50) collate utf8_unicode_ci NOT NULL default '' +) DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +SET @@session.character_set_client=33,@@session.collation_connection=192; +CREATE TEMPORARY TABLE tmptbl504451f4258$1 (id INT NOT NULL) ENGINE=MEMORY; +INSERT INTO t5 (word) VALUES ('TEST’'); +SELECT HEX(word) FROM t5; +HEX(word) +54455354E28099 +SELECT HEX(word) FROM t5; +HEX(word) +54455354E28099 +SELECT * FROM tmptbl504451f4258$1; +ERROR 42S02: Table 'test.tmptbl504451f4258$1' doesn't exist +DROP TABLE t5; diff --git a/mysql-test/t/lowercase_table.test b/mysql-test/t/lowercase_table.test index 96437bc7636..31513f1bd06 100644 --- a/mysql-test/t/lowercase_table.test +++ b/mysql-test/t/lowercase_table.test @@ -85,3 +85,23 @@ drop table t1, t2; show tables; # End of 4.1 tests + + +# +# Bug#20404: SHOW CREATE TABLE fails with Turkish I +# +set names utf8; +--disable_warnings +drop table if exists İ,İİ; +--enable_warnings +create table İ (s1 int); +show create table İ; +show tables; +drop table İ; +create table İİ (s1 int); +show create table İİ; +show tables; +drop table İİ; +set names latin1; + +--echo End of 5.0 tests diff --git a/mysql-test/t/rpl_ignore_table-slave.opt b/mysql-test/t/rpl_ignore_table-slave.opt index cb49119bfcb..3aabbb2e0f5 100644 --- a/mysql-test/t/rpl_ignore_table-slave.opt +++ b/mysql-test/t/rpl_ignore_table-slave.opt @@ -1 +1 @@ ---replicate-ignore-table=test.t1 --replicate-ignore-table=test.t2 --replicate-ignore-table=test.t3 +--replicate-ignore-table=test.t1 --replicate-ignore-table=test.t2 --replicate-ignore-table=test.t3 --replicate-wild-ignore-table=%.tmptbl% diff --git a/mysql-test/t/rpl_ignore_table.test b/mysql-test/t/rpl_ignore_table.test index 84b0a4cde38..31191b9c386 100644 --- a/mysql-test/t/rpl_ignore_table.test +++ b/mysql-test/t/rpl_ignore_table.test @@ -28,3 +28,25 @@ connection master; DROP TABLE t1; DROP TABLE t4; sync_slave_with_master; + +# +# bug#22877 replication character sets get out of sync +# using replicate-wild-ignore-table +# +--disable_warnings +DROP TABLE IF EXISTS t5; +--enable_warnings +CREATE TABLE t5 ( + word varchar(50) collate utf8_unicode_ci NOT NULL default '' +) DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +SET @@session.character_set_client=33,@@session.collation_connection=192; +CREATE TEMPORARY TABLE tmptbl504451f4258$1 (id INT NOT NULL) ENGINE=MEMORY; +INSERT INTO t5 (word) VALUES ('TEST’'); +SELECT HEX(word) FROM t5; +sync_slave_with_master; +connection slave; +SELECT HEX(word) FROM t5; +--error 1146 +SELECT * FROM tmptbl504451f4258$1; +connection master; +DROP TABLE t5; diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index fc7ed7ff673..51fa86affc2 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -2543,7 +2543,23 @@ mysql_execute_command(THD *thd) { /* we warn the slave SQL thread */ my_message(ER_SLAVE_IGNORED_TABLE, ER(ER_SLAVE_IGNORED_TABLE), MYF(0)); - reset_one_shot_variables(thd); + if (thd->one_shot_set) + { + /* + It's ok to check thd->one_shot_set here: + + The charsets in a MySQL 5.0 slave can change by both a binlogged + SET ONE_SHOT statement and the event-internal charset setting, + and these two ways to change charsets do not seems to work + together. + + At least there seems to be problems in the rli cache for + charsets if we are using ONE_SHOT. Note that this is normally no + problem because either the >= 5.0 slave reads a 4.1 binlog (with + ONE_SHOT) *or* or 5.0 binlog (without ONE_SHOT) but never both." + */ + reset_one_shot_variables(thd); + } DBUG_RETURN(0); } } @@ -6358,7 +6374,7 @@ TABLE_LIST *st_select_lex::add_table_to_list(THD *thd, ptr->alias= alias_str; if (lower_case_table_names && table->table.length) - my_casedn_str(files_charset_info, table->table.str); + table->table.length= my_casedn_str(files_charset_info, table->table.str); ptr->table_name=table->table.str; ptr->table_name_length=table->table.length; ptr->lock_type= lock_type; diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c index 5758960ef6c..289b76c9e6e 100644 --- a/strings/ctype-bin.c +++ b/strings/ctype-bin.c @@ -211,9 +211,10 @@ static int my_strnncollsp_8bit_bin(CHARSET_INFO * cs __attribute__((unused)), /* This function is used for all conversion functions */ -static void my_case_str_bin(CHARSET_INFO *cs __attribute__((unused)), +static uint my_case_str_bin(CHARSET_INFO *cs __attribute__((unused)), char *str __attribute__((unused))) { + return 0; } static uint my_case_bin(CHARSET_INFO *cs __attribute__((unused)), diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c index c945164ac9c..c3848c64219 100644 --- a/strings/ctype-mb.c +++ b/strings/ctype-mb.c @@ -21,40 +21,44 @@ #ifdef USE_MB -void my_caseup_str_mb(CHARSET_INFO * cs, char *str) +uint my_caseup_str_mb(CHARSET_INFO * cs, char *str) { register uint32 l; - register uchar *map=cs->to_upper; + register uchar *map= cs->to_upper; + char *str_orig= str; while (*str) { /* Pointing after the '\0' is safe here. */ - if ((l=my_ismbchar(cs, str, str + cs->mbmaxlen))) - str+=l; + if ((l= my_ismbchar(cs, str, str + cs->mbmaxlen))) + str+= l; else { - *str=(char) map[(uchar)*str]; + *str= (char) map[(uchar)*str]; str++; } } + return str - str_orig; } -void my_casedn_str_mb(CHARSET_INFO * cs, char *str) +uint my_casedn_str_mb(CHARSET_INFO * cs, char *str) { register uint32 l; - register uchar *map=cs->to_lower; + register uchar *map= cs->to_lower; + char *str_orig= str; while (*str) { /* Pointing after the '\0' is safe here. */ - if ((l=my_ismbchar(cs, str, str + cs->mbmaxlen))) - str+=l; + if ((l= my_ismbchar(cs, str, str + cs->mbmaxlen))) + str+= l; else { - *str=(char) map[(uchar)*str]; + *str= (char) map[(uchar)*str]; str++; } } + return str - str_orig; } uint my_caseup_mb(CHARSET_INFO * cs, char *src, uint srclen, diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c index 9b45d5a03b7..9d10ba82114 100644 --- a/strings/ctype-simple.c +++ b/strings/ctype-simple.c @@ -188,20 +188,26 @@ int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *a, uint a_length, } -void my_caseup_str_8bit(CHARSET_INFO * cs,char *str) +uint my_caseup_str_8bit(CHARSET_INFO * cs,char *str) { - register uchar *map=cs->to_upper; - while ((*str = (char) map[(uchar) *str]) != 0) + register uchar *map= cs->to_upper; + char *str_orig= str; + while ((*str= (char) map[(uchar) *str]) != 0) str++; + return str - str_orig; } -void my_casedn_str_8bit(CHARSET_INFO * cs,char *str) + +uint my_casedn_str_8bit(CHARSET_INFO * cs,char *str) { - register uchar *map=cs->to_lower; - while ((*str = (char) map[(uchar)*str]) != 0) + register uchar *map= cs->to_lower; + char *str_orig= str; + while ((*str= (char) map[(uchar) *str]) != 0) str++; + return str - str_orig; } + uint my_caseup_8bit(CHARSET_INFO * cs, char *src, uint srclen, char *dst __attribute__((unused)), uint dstlen __attribute__((unused))) diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index df43eff3d73..5089db6bf48 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -159,13 +159,13 @@ static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, uint slen, } -static void my_caseup_str_ucs2(CHARSET_INFO * cs __attribute__((unused)), +static uint my_caseup_str_ucs2(CHARSET_INFO * cs __attribute__((unused)), char * s __attribute__((unused))) { + return 0; } - static uint my_casedn_ucs2(CHARSET_INFO *cs, char *src, uint srclen, char *dst __attribute__((unused)), uint dstlen __attribute__((unused))) @@ -188,9 +188,11 @@ static uint my_casedn_ucs2(CHARSET_INFO *cs, char *src, uint srclen, return srclen; } -static void my_casedn_str_ucs2(CHARSET_INFO *cs __attribute__((unused)), + +static uint my_casedn_str_ucs2(CHARSET_INFO *cs __attribute__((unused)), char * s __attribute__((unused))) { + return 0; } diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index 6ed7736e24a..8a4ed48bef5 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -2047,6 +2047,52 @@ static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)), return MY_CS_ILSEQ; } + +/* + The same as above, but without range check + for example, for a null-terminated string +*/ +static int my_utf8_uni_no_range(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t * pwc, const uchar *s) +{ + unsigned char c; + + c= s[0]; + if (c < 0x80) + { + *pwc = c; + return 1; + } + + if (c < 0xc2) + return MY_CS_ILSEQ; + + if (c < 0xe0) + { + if (!((s[1] ^ 0x80) < 0x40)) + return MY_CS_ILSEQ; + + *pwc = ((my_wc_t) (c & 0x1f) << 6) | (my_wc_t) (s[1] ^ 0x80); + return 2; + } + + if (c < 0xf0) + { + if (!((s[1] ^ 0x80) < 0x40 && + (s[2] ^ 0x80) < 0x40 && + (c >= 0xe1 || s[1] >= 0xa0))) + return MY_CS_ILSEQ; + + *pwc= ((my_wc_t) (c & 0x0f) << 12) | + ((my_wc_t) (s[1] ^ 0x80) << 6) | + (my_wc_t) (s[2] ^ 0x80); + + return 3; + } + return MY_CS_ILSEQ; +} + + static int my_uni_utf8 (CHARSET_INFO *cs __attribute__((unused)) , my_wc_t wc, uchar *r, uchar *e) { @@ -2093,6 +2139,34 @@ static int my_uni_utf8 (CHARSET_INFO *cs __attribute__((unused)) , } +/* + The same as above, but without range check. +*/ +static int my_uni_utf8_no_range(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t wc, uchar *r) +{ + int count; + + if (wc < 0x80) + count= 1; + else if (wc < 0x800) + count= 2; + else if (wc < 0x10000) + count= 3; + else + return MY_CS_ILUNI; + + switch (count) + { + /* Fall through all cases!!! */ + case 3: r[2]= (uchar) (0x80 | (wc & 0x3f)); wc= wc >> 6; wc |= 0x800; + case 2: r[1]= (uchar) (0x80 | (wc & 0x3f)); wc= wc >> 6; wc |= 0xc0; + case 1: r[0]= (uchar) wc; + } + return count; +} + + static uint my_caseup_utf8(CHARSET_INFO *cs, char *src, uint srclen, char *dst, uint dstlen) { @@ -2143,10 +2217,26 @@ static void my_hash_sort_utf8(CHARSET_INFO *cs, const uchar *s, uint slen, } -static void my_caseup_str_utf8(CHARSET_INFO * cs, char * s) +static uint my_caseup_str_utf8(CHARSET_INFO *cs, char *src) { - uint len= (uint) strlen(s); - my_caseup_utf8(cs, s, len, s, len); + my_wc_t wc; + int srcres, dstres; + char *dst= src, *dst0= src; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + DBUG_ASSERT(cs->caseup_multiply == 1); + + while (*src && + (srcres= my_utf8_uni_no_range(cs, &wc, (uchar *) src)) > 0) + { + int plane= (wc>>8) & 0xFF; + wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].toupper : wc; + if ((dstres= my_uni_utf8_no_range(cs, wc, (uchar*) dst)) <= 0) + break; + src+= srcres; + dst+= dstres; + } + *dst= '\0'; + return (uint) (dst - dst0); } @@ -2172,10 +2262,43 @@ static uint my_casedn_utf8(CHARSET_INFO *cs, char *src, uint srclen, return (uint) (dst - dst0); } -static void my_casedn_str_utf8(CHARSET_INFO *cs, char * s) + +static uint my_casedn_str_utf8(CHARSET_INFO *cs, char *src) { - uint len= (uint) strlen(s); - my_casedn_utf8(cs, s, len, s, len); + my_wc_t wc; + int srcres, dstres; + char *dst= src, *dst0= src; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + DBUG_ASSERT(cs->casedn_multiply == 1); + + while (*src && + (srcres= my_utf8_uni_no_range(cs, &wc, (uchar *) src)) > 0) + { + int plane= (wc>>8) & 0xFF; + wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].tolower : wc; + if ((dstres= my_uni_utf8_no_range(cs, wc, (uchar*) dst)) <= 0) + break; + src+= srcres; + dst+= dstres; + } + + /* + In rare cases lower string can be shorter than + the original string, for example: + + "U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE" + (which is 0xC4B0 in utf8, i.e. two bytes) + + is converted into + + "U+0069 LATIN SMALL LETTER I" + (which is 0x69 in utf8, i.e. one byte) + + So, we need to put '\0' terminator after converting. + */ + + *dst= '\0'; + return (uint) (dst - dst0); } |