diff options
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | include/m_ctype.h | 3 | ||||
-rw-r--r-- | mysql-test/r/ctype_ldml.result | 37 | ||||
-rw-r--r-- | mysql-test/std_data/ldml/Index.xml (renamed from mysql-test/std_data/Index.xml) | 6 | ||||
-rw-r--r-- | mysql-test/std_data/ldml/ascii2.xml | 121 | ||||
-rw-r--r-- | mysql-test/std_data/ldml/latin1.xml (renamed from mysql-test/std_data/latin1.xml) | 23 | ||||
-rw-r--r-- | mysql-test/t/ctype_ldml-master.opt | 2 | ||||
-rw-r--r-- | mysql-test/t/ctype_ldml.test | 29 | ||||
-rw-r--r-- | mysys/charset.c | 16 | ||||
-rw-r--r-- | strings/conf_to_src.c | 16 | ||||
-rw-r--r-- | strings/ctype-simple.c | 72 | ||||
-rw-r--r-- | strings/ctype.c | 42 | ||||
-rw-r--r-- | strings/strings_def.h | 5 |
13 files changed, 299 insertions, 74 deletions
diff --git a/.gitignore b/.gitignore index 279d3f59dc5..ef26322a19d 100644 --- a/.gitignore +++ b/.gitignore @@ -198,6 +198,7 @@ storage/tokudb/PerconaFT/tools/tokudb_load storage/tokudb/PerconaFT/tools/tokuftdump storage/tokudb/PerconaFT/tools/tokuft_logprint storage/tokudb/PerconaFT/xz/ +strings/conf_to_src support-files/MySQL-shared-compat.spec support-files/binary-configure support-files/config.huge.ini diff --git a/include/m_ctype.h b/include/m_ctype.h index 7a688f76acb..796dd630aa7 100644 --- a/include/m_ctype.h +++ b/include/m_ctype.h @@ -865,7 +865,6 @@ void my_string_metadata_get(MY_STRING_METADATA *metadata, CHARSET_INFO *cs, const char *str, size_t len); uint my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong len); my_bool my_charset_is_ascii_based(CHARSET_INFO *cs); -my_bool my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs); uint my_charset_repertoire(CHARSET_INFO *cs); uint my_strxfrm_flag_normalize(uint flags, uint nlevels); @@ -875,8 +874,6 @@ size_t my_strxfrm_pad_desc_and_reverse(CHARSET_INFO *cs, uchar *str, uchar *frmend, uchar *strend, uint nweights, uint flags, uint level); -my_bool my_charset_is_ascii_compatible(CHARSET_INFO *cs); - const MY_CONTRACTIONS *my_charset_get_contractions(CHARSET_INFO *cs, int level); diff --git a/mysql-test/r/ctype_ldml.result b/mysql-test/r/ctype_ldml.result index d333c03143b..05862c92cb3 100644 --- a/mysql-test/r/ctype_ldml.result +++ b/mysql-test/r/ctype_ldml.result @@ -5,7 +5,7 @@ with all Unicode character sets set names utf8; show variables like 'character_sets_dir%'; Variable_name Value -character_sets_dir MYSQL_TEST_DIR/std_data/ +character_sets_dir MYSQL_TEST_DIR/std_data/ldml/ show collation like 'utf8_phone_ci'; Collation Charset Id Default Compiled Sortlen utf8_phone_ci utf8 352 8 @@ -454,10 +454,13 @@ select "foo" = "foo " collate latin1_test; The following tests check that two-byte collation IDs work select * from information_schema.collations where id>256 and is_compiled<>'Yes' order by id; COLLATION_NAME CHARACTER_SET_NAME ID IS_DEFAULT IS_COMPILED SORTLEN +ascii2_general_ci ascii2 320 Yes 1 +ascii2_bin ascii2 321 1 utf8mb4_test_ci utf8mb4 326 8 utf16_test_ci utf16 327 8 utf8mb4_test_400_ci utf8mb4 328 8 latin1_test2 latin1 332 1 +latin1_test2_cs latin1 333 1 utf8_bengali_standard_ci utf8 336 8 utf8_bengali_traditional_ci utf8 337 8 utf8_implicit_weights_ci utf8 338 8 @@ -478,6 +481,7 @@ show collation like '%test%'; Collation Charset Id Default Compiled Sortlen latin1_test latin1 99 Yes 1 latin1_test2 latin1 332 1 +latin1_test2_cs latin1 333 1 utf8_test_ci utf8 353 8 ucs2_test_ci ucs2 358 8 utf8mb4_test_ci utf8mb4 326 8 @@ -1179,3 +1183,34 @@ ch w ducet 3700 FB80B700 FB80B700 3701 FB80B700 FB80B701 DROP TABLE t1; +# +# Testing that the MY_CS_PUREASCII flag is set properly +# +CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET ascii2, b VARCHAR(10) CHARACTER SET latin1); +INSERT INTO t1 VALUES ('a','a'),('b','b'); +SELECT * FROM t1 WHERE a=b; +a b +a a +b b +ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET ascii2 COLLATE ascii2_bin; +SELECT * FROM t1 WHERE a=b; +a b +a a +b b +DROP TABLE t1; +# +# Testing that the MY_CS_CSSORT flag is set properly +# +CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET latin1 COLLATE latin1_test); +INSERT INTO t1 VALUES ('a'),('A'); +SELECT * FROM t1 WHERE a RLIKE 'a'; +a +a +A +DROP TABLE t1; +CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET latin1 COLLATE latin1_test2_cs); +INSERT INTO t1 VALUES ('a'),('A'); +SELECT * FROM t1 WHERE a RLIKE 'a'; +a +a +DROP TABLE t1; diff --git a/mysql-test/std_data/Index.xml b/mysql-test/std_data/ldml/Index.xml index b66fdfee55c..66de0c725b7 100644 --- a/mysql-test/std_data/Index.xml +++ b/mysql-test/std_data/ldml/Index.xml @@ -318,6 +318,11 @@ </charset> + <charset name="ascii2"> + <collation name="ascii2_general_ci" id="320" flag="primary"/> + <collation name="ascii2_bin" id="321" flag="binary"/> + </charset> + <charset name="latin1"> <family>Western</family> <description>cp1252 West European</description> @@ -330,6 +335,7 @@ <alias>latin1</alias> <collation name="latin1_test" id="99" order="test"/> <collation name="latin1_test2" id="332" order="test"/> + <collation name="latin1_test2_cs" id="333"/> </charset> <charset name="utf8"> diff --git a/mysql-test/std_data/ldml/ascii2.xml b/mysql-test/std_data/ldml/ascii2.xml new file mode 100644 index 00000000000..f1936e020be --- /dev/null +++ b/mysql-test/std_data/ldml/ascii2.xml @@ -0,0 +1,121 @@ +<?xml version='1.0' encoding="utf-8"?> + +<charsets> + +<charset name="ascii2"> + +<ctype> +<map> + 00 + 20 20 20 20 20 20 20 20 20 28 28 28 28 28 20 20 + 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 + 48 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 + 84 84 84 84 84 84 84 84 84 84 10 10 10 10 10 10 + 10 81 81 81 81 81 81 01 01 01 01 01 01 01 01 01 + 01 01 01 01 01 01 01 01 01 01 01 10 10 10 10 10 + 10 82 82 82 82 82 82 02 02 02 02 02 02 02 02 02 + 02 02 02 02 02 02 02 02 02 02 02 10 10 10 10 20 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +</map> +</ctype> + + +<lower> +<map> + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F + 70 71 72 73 74 75 76 77 78 79 7A 5B 5C 5D 5E 5F + 60 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F + 70 71 72 73 74 75 76 77 78 79 7A 7B 7C 7D 7E 7F + 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F + 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F + A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF + B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF + C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF + E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF + F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF +</map> +</lower> + + +<upper> +<map> + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F + 60 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 7B 7C 7D 7E 7F + 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F + 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F + A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF + B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF + C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF + E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF + F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF +</map> +</upper> + + +<unicode> +<map> +0000 0001 0002 0003 0004 0005 0006 0007 0008 0009 000A 000B 000C 000D 000E 000F +0010 0011 0012 0013 0014 0015 0016 0017 0018 0019 001A 001B 001C 001D 001E 001F +0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002A 002B 002C 002D 002E 002F +0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003A 003B 003C 003D 003E 003F +0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004A 004B 004C 004D 004E 004F +0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005A 005B 005C 005D 005E 005F +0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006A 006B 006C 006D 006E 006F +0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007A 007B 007C 007D 007E 007F +0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 +0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 +0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 +0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 +0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 +0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 +0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 +0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 +</map> +</unicode> + + +<collation name="ascii2_general_ci"> +<map> + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F + 60 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F + 50 51 52 53 54 55 56 57 58 59 5A 7B 7C 7D 7E 7F + 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F + 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F + A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF + B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF + C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF + E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF + F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF +</map> +</collation> + +<collation name="ascii2_bin" flag="binary"/> + +</charset> + +</charsets> diff --git a/mysql-test/std_data/latin1.xml b/mysql-test/std_data/ldml/latin1.xml index fd5197254e2..3e1d021c127 100644 --- a/mysql-test/std_data/latin1.xml +++ b/mysql-test/std_data/ldml/latin1.xml @@ -152,6 +152,29 @@ </map> </collation> + +<!-- This is a copy of latin1_general_cs --> +<collation name="latin1_test2_cs"> +<map> + 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F + 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F + 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F + 40 41 51 53 57 5B 65 67 69 6B 75 77 79 7B 7D 81 + 8F 91 93 95 98 9A A4 A6 A8 AA AF B3 B4 B5 B6 B7 + B8 42 52 54 58 5C 66 68 6A 6C 76 78 7A 7C 7E 82 + 90 92 94 96 99 9B A5 A7 A9 AB B0 B9 BA BB BC BF + C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF + D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF + E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF + F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF + 43 45 47 49 4B 4D 4F 55 5D 5F 61 63 6D 6F 71 73 + 59 7F 83 85 87 89 8B BD 8D 9C 9E A0 A2 AC B1 97 + 44 46 48 4A 4C 4E 50 56 5E 60 62 64 6E 70 72 74 + 5A 80 84 86 88 8A 8C BE 8E 9D 9F A1 A3 AD B2 AE +</map> +</collation> + </charset> </charsets> diff --git a/mysql-test/t/ctype_ldml-master.opt b/mysql-test/t/ctype_ldml-master.opt index d7ecd9095cb..250dd2cb5a2 100644 --- a/mysql-test/t/ctype_ldml-master.opt +++ b/mysql-test/t/ctype_ldml-master.opt @@ -1,2 +1,2 @@ ---character-sets-dir=$MYSQL_TEST_DIR/std_data/ +--character-sets-dir=$MYSQL_TEST_DIR/std_data/ldml/ --log-error=$MYSQLTEST_VARDIR/tmp/ctype_ldml_log.err diff --git a/mysql-test/t/ctype_ldml.test b/mysql-test/t/ctype_ldml.test index 1ea8002a2eb..37efc775b27 100644 --- a/mysql-test/t/ctype_ldml.test +++ b/mysql-test/t/ctype_ldml.test @@ -409,3 +409,32 @@ INSERT INTO t1 VALUES (_ucs2 0x3400),(_ucs2 0x3560),(_ucs2 0x3561),(_ucs2 0x3600 INSERT INTO t1 VALUES (_ucs2 0x3700),(_ucs2 0x3701); SELECT HEX(CONVERT(a USING ucs2)) AS ch, HEX(WEIGHT_STRING(a)) AS w, HEX(WEIGHT_STRING(a COLLATE utf8_unicode_ci)) AS ducet FROM t1 ORDER BY a,ch; DROP TABLE t1; + + +--echo # +--echo # Testing that the MY_CS_PUREASCII flag is set properly +--echo # +CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET ascii2, b VARCHAR(10) CHARACTER SET latin1); +INSERT INTO t1 VALUES ('a','a'),('b','b'); +# should not give "illegal collation" error +SELECT * FROM t1 WHERE a=b; +ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET ascii2 COLLATE ascii2_bin; +# should not give "illegal collation" error +SELECT * FROM t1 WHERE a=b; +DROP TABLE t1; + + +--echo # +--echo # Testing that the MY_CS_CSSORT flag is set properly +--echo # +CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET latin1 COLLATE latin1_test); +INSERT INTO t1 VALUES ('a'),('A'); +# should be case insensitive +SELECT * FROM t1 WHERE a RLIKE 'a'; +DROP TABLE t1; + +CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET latin1 COLLATE latin1_test2_cs); +INSERT INTO t1 VALUES ('a'),('A'); + # should be case sensitive +SELECT * FROM t1 WHERE a RLIKE 'a'; +DROP TABLE t1; diff --git a/mysys/charset.c b/mysys/charset.c index e46fd16a5fb..2a96ec0070d 100644 --- a/mysys/charset.c +++ b/mysys/charset.c @@ -297,7 +297,6 @@ static int add_collation(struct charset_info_st *cs) } else { - const uchar *sort_order= newcs->sort_order; simple_cs_init_functions(newcs); newcs->mbminlen= 1; newcs->mbmaxlen= 1; @@ -307,21 +306,6 @@ static int add_collation(struct charset_info_st *cs) newcs->state |= MY_CS_LOADED; } newcs->state|= MY_CS_AVAILABLE; - - /* - Check if case sensitive sort order: A < a < B. - We need MY_CS_FLAG for regex library, and for - case sensitivity flag for 5.0 client protocol, - to support isCaseSensitive() method in JDBC driver - */ - if (sort_order && sort_order['A'] < sort_order['a'] && - sort_order['a'] < sort_order['B']) - newcs->state|= MY_CS_CSSORT; - - if (my_charset_is_8bit_pure_ascii(newcs)) - newcs->state|= MY_CS_PUREASCII; - if (!my_charset_is_ascii_compatible(cs)) - newcs->state|= MY_CS_NONASCII; } } else diff --git a/strings/conf_to_src.c b/strings/conf_to_src.c index 5b9793f388d..31093fe4230 100644 --- a/strings/conf_to_src.c +++ b/strings/conf_to_src.c @@ -193,25 +193,19 @@ static int my_read_charset_file(const char *filename) return FALSE; } -static int -is_case_sensitive(CHARSET_INFO *cs) -{ - return (cs->sort_order && - cs->sort_order['A'] < cs->sort_order['a'] && - cs->sort_order['a'] < cs->sort_order['B']) ? 1 : 0; -} - void dispcset(FILE *f,CHARSET_INFO *cs) { + uint flags= my_8bit_charset_flags_from_data(cs) | + my_8bit_collation_flags_from_data(cs); fprintf(f,"{\n"); fprintf(f," %d,%d,%d,\n",cs->number,0,0); fprintf(f," MY_CS_COMPILED%s%s%s%s%s,\n", cs->state & MY_CS_BINSORT ? "|MY_CS_BINSORT" : "", cs->state & MY_CS_PRIMARY ? "|MY_CS_PRIMARY" : "", - is_case_sensitive(cs) ? "|MY_CS_CSSORT" : "", - my_charset_is_8bit_pure_ascii(cs) ? "|MY_CS_PUREASCII" : "", - !my_charset_is_ascii_compatible(cs) ? "|MY_CS_NONASCII": ""); + flags & MY_CS_CSSORT ? "|MY_CS_CSSORT" : "", + flags & MY_CS_PUREASCII ? "|MY_CS_PUREASCII" : "", + flags & MY_CS_NONASCII ? "|MY_CS_NONASCII" : ""); if (cs->name) { diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c index f405c4f327b..e6cea06b17b 100644 --- a/strings/ctype-simple.c +++ b/strings/ctype-simple.c @@ -1340,9 +1340,80 @@ create_fromuni(struct charset_info_st *cs, return FALSE; } + +/* + Detect if a character set is 8bit, + and it is pure ascii, i.e. doesn't have + characters outside U+0000..U+007F + This functions is shared between "conf_to_src" + and dynamic charsets loader in "mysqld". +*/ +static my_bool +my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs) +{ + size_t code; + if (!cs->tab_to_uni) + return 0; + for (code= 0; code < 256; code++) + { + if (cs->tab_to_uni[code] > 0x7F) + return 0; + } + return 1; +} + + +/* + Shared function between conf_to_src and mysys. + Check if a 8bit character set is compatible with + ascii on the range 0x00..0x7F. +*/ +static my_bool +my_charset_is_ascii_compatible(CHARSET_INFO *cs) +{ + uint i; + if (!cs->tab_to_uni) + return 1; + for (i= 0; i < 128; i++) + { + if (cs->tab_to_uni[i] != i) + return 0; + } + return 1; +} + + +uint my_8bit_charset_flags_from_data(CHARSET_INFO *cs) +{ + uint flags= 0; + if (my_charset_is_8bit_pure_ascii(cs)) + flags|= MY_CS_PUREASCII; + if (!my_charset_is_ascii_compatible(cs)) + flags|= MY_CS_NONASCII; + return flags; +} + + +/* + Check if case sensitive sort order: A < a < B. + We need MY_CS_FLAG for regex library, and for + case sensitivity flag for 5.0 client protocol, + to support isCaseSensitive() method in JDBC driver +*/ +uint my_8bit_collation_flags_from_data(CHARSET_INFO *cs) +{ + uint flags= 0; + if (cs->sort_order && cs->sort_order['A'] < cs->sort_order['a'] && + cs->sort_order['a'] < cs->sort_order['B']) + flags|= MY_CS_CSSORT; + return flags; +} + + static my_bool my_cset_init_8bit(struct charset_info_st *cs, MY_CHARSET_LOADER *loader) { + cs->state|= my_8bit_charset_flags_from_data(cs); cs->caseup_multiply= 1; cs->casedn_multiply= 1; cs->pad_char= ' '; @@ -1371,6 +1442,7 @@ static void set_max_sort_char(struct charset_info_st *cs) static my_bool my_coll_init_simple(struct charset_info_st *cs, MY_CHARSET_LOADER *loader __attribute__((unused))) { + cs->state|= my_8bit_collation_flags_from_data(cs); set_max_sort_char(cs); return FALSE; } diff --git a/strings/ctype.c b/strings/ctype.c index 620c7e13503..be8a8cb506e 100644 --- a/strings/ctype.c +++ b/strings/ctype.c @@ -974,48 +974,6 @@ my_charset_is_ascii_based(CHARSET_INFO *cs) /* - Detect if a character set is 8bit, - and it is pure ascii, i.e. doesn't have - characters outside U+0000..U+007F - This functions is shared between "conf_to_src" - and dynamic charsets loader in "mysqld". -*/ -my_bool -my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs) -{ - size_t code; - if (!cs->tab_to_uni) - return 0; - for (code= 0; code < 256; code++) - { - if (cs->tab_to_uni[code] > 0x7F) - return 0; - } - return 1; -} - - -/* - Shared function between conf_to_src and mysys. - Check if a 8bit character set is compatible with - ascii on the range 0x00..0x7F. -*/ -my_bool -my_charset_is_ascii_compatible(CHARSET_INFO *cs) -{ - uint i; - if (!cs->tab_to_uni) - return 1; - for (i= 0; i < 128; i++) - { - if (cs->tab_to_uni[i] != i) - return 0; - } - return 1; -} - - -/* Convert a string between two character sets. 'to' must be large enough to store (form_length * to_cs->mbmaxlen) bytes. diff --git a/strings/strings_def.h b/strings/strings_def.h index fb280b6bb6b..36d3d2b2fe9 100644 --- a/strings/strings_def.h +++ b/strings/strings_def.h @@ -101,6 +101,11 @@ static inline const uchar *skip_trailing_space(const uchar *ptr,size_t len) return (end); } + +uint my_8bit_charset_flags_from_data(CHARSET_INFO *cs); +uint my_8bit_collation_flags_from_data(CHARSET_INFO *cs); + + /* Macros for hashing characters */ #define MY_HASH_ADD(A, B, value) \ |