diff options
author | Alexander Barkov <bar@mariadb.com> | 2019-06-28 09:05:12 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mariadb.com> | 2019-06-28 12:37:04 +0400 |
commit | 3e7e87ddccf71ba5328c257b7d642d6803fbca3a (patch) | |
tree | 0ea22d4c533b84b2675c3d7c7555f5ad367222ac /strings | |
parent | 323a87b591d3e9bcedb7dea09b69d9eeb1c42880 (diff) | |
download | mariadb-git-3e7e87ddccf71ba5328c257b7d642d6803fbca3a.tar.gz |
MDEV-19897 Rename source code variable names from utf8 to utf8mb3
Diffstat (limited to 'strings')
-rw-r--r-- | strings/CHARSET_INFO.txt | 2 | ||||
-rw-r--r-- | strings/ctype-mb.ic | 2 | ||||
-rw-r--r-- | strings/ctype-uca.c | 526 | ||||
-rw-r--r-- | strings/ctype-uca.ic | 2 | ||||
-rw-r--r-- | strings/ctype-utf8.c | 414 | ||||
-rw-r--r-- | strings/ctype.c | 2 | ||||
-rw-r--r-- | strings/strcoll.ic | 8 |
7 files changed, 479 insertions, 477 deletions
diff --git a/strings/CHARSET_INFO.txt b/strings/CHARSET_INFO.txt index 6f0a810be37..922a372495b 100644 --- a/strings/CHARSET_INFO.txt +++ b/strings/CHARSET_INFO.txt @@ -129,7 +129,7 @@ In all Asian charsets these arrays are set up as follows: In Unicode character sets we have full support of UPPER/LOWER mapping, for sorting order, and for character type detection. -"utf8_general_ci" still has the "old-fashioned" arrays +"utf8mb3_general_ci" still has the "old-fashioned" arrays like to_upper, to_lower, sort_order and ctype, but they are not really used (maybe only in some rare legacy functions). diff --git a/strings/ctype-mb.ic b/strings/ctype-mb.ic index 336c482d24f..6cde31a34ad 100644 --- a/strings/ctype-mb.ic +++ b/strings/ctype-mb.ic @@ -167,7 +167,7 @@ MY_FUNCTION_NAME(well_formed_char_length)(CHARSET_INFO *cs __attribute__((unused /** Returns well formed length of a string measured in characters (rather than in bytes). - Version for character sets that define CHARLEN(), e.g. utf8. + Version for character sets that define CHARLEN(), e.g. utf8mb3. CHARLEN(cs,b,e) must use the same return code convension that mb_wc() does: - a positive number in the range [1-mbmaxlen] if a valid single-byte or multi-byte character was found diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c index 312b903ea64..99a0d0f46ae 100644 --- a/strings/ctype-uca.c +++ b/strings/ctype-uca.c @@ -32312,7 +32312,7 @@ static my_coll_lexem_num my_coll_lexem_next(MY_COLL_LEXEM *lexem) /* Escaped character, e.g. \u1234 */ if ((*beg == '\\') && (beg + 2 < lexem->end) && - (beg[1] == 'u') && my_isxdigit(&my_charset_utf8_general_ci, beg[2])) + (beg[1] == 'u') && my_isxdigit(&my_charset_utf8mb3_general_ci, beg[2])) { int ch; @@ -32341,7 +32341,7 @@ static my_coll_lexem_num my_coll_lexem_next(MY_COLL_LEXEM *lexem) if (((uchar) *beg) > 0x7F) /* Unescaped multibyte character */ { - CHARSET_INFO *cs= &my_charset_utf8_general_ci; + CHARSET_INFO *cs= &my_charset_utf8mb3_general_ci; my_wc_t wc; int nbytes= cs->cset->mb_wc(cs, &wc, (uchar *) beg, (uchar *) lexem->end); @@ -33720,7 +33720,7 @@ static my_bool my_coll_init_uca(struct charset_info_st *cs, MY_CHARSET_LOADER *loader) { cs->pad_char= ' '; - cs->ctype= my_charset_utf8_unicode_ci.ctype; + cs->ctype= my_charset_utf8mb3_unicode_ci.ctype; if (!cs->caseinfo) cs->caseinfo= &my_unicase_default; return create_tailoring(cs, loader); @@ -33894,7 +33894,7 @@ struct charset_info_st my_charset_ucs2_unicode_ci= { 128,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ + "ucs2", /* cs name */ "ucs2_unicode_ci", /* name */ "", /* comment */ "", /* tailoring */ @@ -33926,7 +33926,7 @@ struct charset_info_st my_charset_ucs2_icelandic_uca_ci= { 129,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ + "ucs2", /* cs name */ "ucs2_icelandic_ci",/* name */ "", /* comment */ icelandic, /* tailoring */ @@ -33958,7 +33958,7 @@ struct charset_info_st my_charset_ucs2_latvian_uca_ci= { 130,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ + "ucs2", /* cs name */ "ucs2_latvian_ci", /* name */ "", /* comment */ latvian, /* tailoring */ @@ -33990,7 +33990,7 @@ struct charset_info_st my_charset_ucs2_romanian_uca_ci= { 131,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ + "ucs2", /* cs name */ "ucs2_romanian_ci", /* name */ "", /* comment */ romanian, /* tailoring */ @@ -34022,7 +34022,7 @@ struct charset_info_st my_charset_ucs2_slovenian_uca_ci= { 132,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ + "ucs2", /* cs name */ "ucs2_slovenian_ci",/* name */ "", /* comment */ slovenian, /* tailoring */ @@ -34054,7 +34054,7 @@ struct charset_info_st my_charset_ucs2_polish_uca_ci= { 133,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ + "ucs2", /* cs name */ "ucs2_polish_ci", /* name */ "", /* comment */ polish, /* tailoring */ @@ -34086,7 +34086,7 @@ struct charset_info_st my_charset_ucs2_estonian_uca_ci= { 134,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ + "ucs2", /* cs name */ "ucs2_estonian_ci", /* name */ "", /* comment */ estonian, /* tailoring */ @@ -34118,7 +34118,7 @@ struct charset_info_st my_charset_ucs2_spanish_uca_ci= { 135,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ + "ucs2", /* cs name */ "ucs2_spanish_ci", /* name */ "", /* comment */ spanish, /* tailoring */ @@ -34150,7 +34150,7 @@ struct charset_info_st my_charset_ucs2_swedish_uca_ci= { 136,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ + "ucs2", /* cs name */ "ucs2_swedish_ci", /* name */ "", /* comment */ swedish, /* tailoring */ @@ -34182,7 +34182,7 @@ struct charset_info_st my_charset_ucs2_turkish_uca_ci= { 137,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ + "ucs2", /* cs name */ "ucs2_turkish_ci", /* name */ "", /* comment */ turkish, /* tailoring */ @@ -34214,7 +34214,7 @@ struct charset_info_st my_charset_ucs2_czech_uca_ci= { 138,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ + "ucs2", /* cs name */ "ucs2_czech_ci", /* name */ "", /* comment */ czech, /* tailoring */ @@ -34247,7 +34247,7 @@ struct charset_info_st my_charset_ucs2_danish_uca_ci= { 139,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ + "ucs2", /* cs name */ "ucs2_danish_ci", /* name */ "", /* comment */ danish, /* tailoring */ @@ -34279,8 +34279,8 @@ struct charset_info_st my_charset_ucs2_lithuanian_uca_ci= { 140,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ - "ucs2_lithuanian_ci",/* name */ + "ucs2", /* cs name */ + "ucs2_lithuanian_ci",/* name */ "", /* comment */ lithuanian, /* tailoring */ NULL, /* ctype */ @@ -34311,7 +34311,7 @@ struct charset_info_st my_charset_ucs2_slovak_uca_ci= { 141,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ + "ucs2", /* cs name */ "ucs2_slovak_ci", /* name */ "", /* comment */ slovak, /* tailoring */ @@ -34343,7 +34343,7 @@ struct charset_info_st my_charset_ucs2_spanish2_uca_ci= { 142,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ + "ucs2", /* cs name */ "ucs2_spanish2_ci", /* name */ "", /* comment */ spanish2, /* tailoring */ @@ -34376,7 +34376,7 @@ struct charset_info_st my_charset_ucs2_roman_uca_ci= { 143,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ + "ucs2", /* cs name */ "ucs2_roman_ci", /* name */ "", /* comment */ roman, /* tailoring */ @@ -34409,7 +34409,7 @@ struct charset_info_st my_charset_ucs2_persian_uca_ci= { 144,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ + "ucs2", /* cs name */ "ucs2_persian_ci", /* name */ "", /* comment */ persian, /* tailoring */ @@ -34442,7 +34442,7 @@ struct charset_info_st my_charset_ucs2_esperanto_uca_ci= { 145,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ + "ucs2", /* cs name */ "ucs2_esperanto_ci",/* name */ "", /* comment */ esperanto, /* tailoring */ @@ -34475,7 +34475,7 @@ struct charset_info_st my_charset_ucs2_hungarian_uca_ci= { 146,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ + "ucs2", /* cs name */ "ucs2_hungarian_ci",/* name */ "", /* comment */ hungarian, /* tailoring */ @@ -34506,8 +34506,8 @@ struct charset_info_st my_charset_ucs2_hungarian_uca_ci= struct charset_info_st my_charset_ucs2_sinhala_uca_ci= { 147,0,0, /* number */ - MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* csname */ + MY_CS_UCS2_UCA_FLAGS,/* state */ + "ucs2", /* csname */ "ucs2_sinhala_ci", /* name */ "", /* comment */ sinhala, /* tailoring */ @@ -34540,8 +34540,8 @@ struct charset_info_st my_charset_ucs2_sinhala_uca_ci= struct charset_info_st my_charset_ucs2_german2_uca_ci= { 148,0,0, /* number */ - MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* csname */ + MY_CS_UCS2_UCA_FLAGS,/* state */ + "ucs2", /* csname */ "ucs2_german2_ci", /* name */ "", /* comment */ german2, /* tailoring */ @@ -34572,9 +34572,9 @@ struct charset_info_st my_charset_ucs2_german2_uca_ci= struct charset_info_st my_charset_ucs2_croatian_mysql561_uca_ci= { 149,0,0, /* number */ - MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ - "ucs2_croatian_mysql561_ci", /* name */ + MY_CS_UCS2_UCA_FLAGS,/* state */ + "ucs2", /* cs name */ + "ucs2_croatian_mysql561_ci",/* name */ "", /* comment */ croatian_mysql561, /* tailoring */ NULL, /* ctype */ @@ -34605,9 +34605,9 @@ struct charset_info_st my_charset_ucs2_croatian_mysql561_uca_ci= struct charset_info_st my_charset_ucs2_croatian_uca_ci= { MY_PAGE2_COLLATION_ID_UCS2,0,0, /* number */ - MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ - "ucs2_croatian_ci", /* name */ + MY_CS_UCS2_UCA_FLAGS,/* state */ + "ucs2", /* cs name */ + "ucs2_croatian_ci", /* name */ "", /* comment */ croatian_mariadb, /* tailoring */ NULL, /* ctype */ @@ -34638,9 +34638,9 @@ struct charset_info_st my_charset_ucs2_croatian_uca_ci= struct charset_info_st my_charset_ucs2_myanmar_uca_ci= { MY_PAGE2_COLLATION_ID_UCS2+1,0,0, /* number */ - MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ - "ucs2_myanmar_ci", /* name */ + MY_CS_UCS2_UCA_FLAGS,/* state */ + "ucs2", /* cs name */ + "ucs2_myanmar_ci", /* name */ "", /* comment */ myanmar, /* tailoring */ NULL, /* ctype */ @@ -34736,7 +34736,7 @@ struct charset_info_st my_charset_ucs2_unicode_520_ci= struct charset_info_st my_charset_ucs2_vietnamese_ci= { 151,0,0, /* number */ - MY_CS_UCS2_UCA_FLAGS,/* state */ + MY_CS_UCS2_UCA_FLAGS,/* state */ "ucs2", /* csname */ "ucs2_vietnamese_ci",/* name */ "", /* comment */ @@ -34835,7 +34835,7 @@ struct charset_info_st my_charset_ucs2_unicode_520_nopad_ci= #endif -#ifdef HAVE_CHARSET_utf8 +#ifdef HAVE_CHARSET_utf8mb3 static my_bool my_uca_coll_init_utf8mb3(struct charset_info_st *cs, MY_CHARSET_LOADER *loader); @@ -34876,7 +34876,7 @@ my_uca_coll_init_utf8mb3(struct charset_info_st *cs, MY_CHARSET_LOADER *loader) expressions. Note, there is no need to mark byte 255 as a letter, it is illegal byte in UTF8. */ -static uchar ctype_utf8[] = { +static uchar ctype_utf8mb3[] = { 0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, @@ -34896,20 +34896,20 @@ static uchar ctype_utf8[] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0 }; -extern MY_CHARSET_HANDLER my_charset_utf8_handler; +extern MY_CHARSET_HANDLER my_charset_utf8mb3_handler; #define MY_CS_UTF8MB3_UCA_FLAGS MY_CS_COMMON_UCA_FLAGS #define MY_CS_UTF8MB3_UCA_NOPAD_FLAGS (MY_CS_UTF8MB3_UCA_FLAGS|MY_CS_NOPAD) -struct charset_info_st my_charset_utf8_unicode_ci= +struct charset_info_st my_charset_utf8mb3_unicode_ci= { 192,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_unicode_ci", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_unicode_ci", /* name */ "", /* comment */ "", /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -34929,20 +34929,20 @@ struct charset_info_st my_charset_utf8_unicode_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_icelandic_uca_ci= +struct charset_info_st my_charset_utf8mb3_icelandic_uca_ci= { 193,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_icelandic_ci",/* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_icelandic_ci",/* name */ "", /* comment */ icelandic, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -34962,19 +34962,19 @@ struct charset_info_st my_charset_utf8_icelandic_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_latvian_uca_ci= +struct charset_info_st my_charset_utf8mb3_latvian_uca_ci= { 194,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_latvian_ci", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_latvian_ci",/* name */ "", /* comment */ latvian, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -34994,19 +34994,19 @@ struct charset_info_st my_charset_utf8_latvian_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_romanian_uca_ci= +struct charset_info_st my_charset_utf8mb3_romanian_uca_ci= { 195,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_romanian_ci", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_romanian_ci", /* name */ "", /* comment */ romanian, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35026,19 +35026,19 @@ struct charset_info_st my_charset_utf8_romanian_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_slovenian_uca_ci= +struct charset_info_st my_charset_utf8mb3_slovenian_uca_ci= { 196,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_slovenian_ci",/* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_slovenian_ci",/* name */ "", /* comment */ slovenian, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35058,19 +35058,19 @@ struct charset_info_st my_charset_utf8_slovenian_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_polish_uca_ci= +struct charset_info_st my_charset_utf8mb3_polish_uca_ci= { 197,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_polish_ci", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_polish_ci",/* name */ "", /* comment */ polish, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35090,19 +35090,19 @@ struct charset_info_st my_charset_utf8_polish_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_estonian_uca_ci= +struct charset_info_st my_charset_utf8mb3_estonian_uca_ci= { 198,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_estonian_ci", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_estonian_ci",/* name */ "", /* comment */ estonian, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35122,19 +35122,19 @@ struct charset_info_st my_charset_utf8_estonian_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_spanish_uca_ci= +struct charset_info_st my_charset_utf8mb3_spanish_uca_ci= { 199,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_spanish_ci", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_spanish_ci", /* name */ "", /* comment */ spanish, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35154,19 +35154,19 @@ struct charset_info_st my_charset_utf8_spanish_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_swedish_uca_ci= +struct charset_info_st my_charset_utf8mb3_swedish_uca_ci= { 200,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_swedish_ci", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_swedish_ci", /* name */ "", /* comment */ swedish, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35186,19 +35186,19 @@ struct charset_info_st my_charset_utf8_swedish_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_turkish_uca_ci= +struct charset_info_st my_charset_utf8mb3_turkish_uca_ci= { 201,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_turkish_ci", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_turkish_ci", /* name */ "", /* comment */ turkish, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35218,19 +35218,19 @@ struct charset_info_st my_charset_utf8_turkish_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_czech_uca_ci= +struct charset_info_st my_charset_utf8mb3_czech_uca_ci= { 202,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_czech_ci", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_czech_ci", /* name */ "", /* comment */ czech, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35250,20 +35250,20 @@ struct charset_info_st my_charset_utf8_czech_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_danish_uca_ci= +struct charset_info_st my_charset_utf8mb3_danish_uca_ci= { 203,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_danish_ci", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_danish_ci", /* name */ "", /* comment */ danish, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35283,19 +35283,19 @@ struct charset_info_st my_charset_utf8_danish_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_lithuanian_uca_ci= +struct charset_info_st my_charset_utf8mb3_lithuanian_uca_ci= { 204,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_lithuanian_ci",/* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_lithuanian_ci",/* name */ "", /* comment */ lithuanian, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35315,19 +35315,19 @@ struct charset_info_st my_charset_utf8_lithuanian_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_slovak_uca_ci= +struct charset_info_st my_charset_utf8mb3_slovak_uca_ci= { 205,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_slovak_ci", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_slovak_ci",/* name */ "", /* comment */ slovak, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35347,19 +35347,19 @@ struct charset_info_st my_charset_utf8_slovak_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_spanish2_uca_ci= +struct charset_info_st my_charset_utf8mb3_spanish2_uca_ci= { 206,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_spanish2_ci", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_spanish2_ci",/* name */ "", /* comment */ spanish2, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35379,19 +35379,19 @@ struct charset_info_st my_charset_utf8_spanish2_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_roman_uca_ci= +struct charset_info_st my_charset_utf8mb3_roman_uca_ci= { 207,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_roman_ci", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_roman_ci",/* name */ "", /* comment */ roman, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35411,19 +35411,19 @@ struct charset_info_st my_charset_utf8_roman_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_persian_uca_ci= +struct charset_info_st my_charset_utf8mb3_persian_uca_ci= { 208,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_persian_ci", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_persian_ci",/* name */ "", /* comment */ persian, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35443,19 +35443,19 @@ struct charset_info_st my_charset_utf8_persian_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_esperanto_uca_ci= +struct charset_info_st my_charset_utf8mb3_esperanto_uca_ci= { 209,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_esperanto_ci",/* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_esperanto_ci",/* name */ "", /* comment */ esperanto, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35475,19 +35475,19 @@ struct charset_info_st my_charset_utf8_esperanto_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_hungarian_uca_ci= +struct charset_info_st my_charset_utf8mb3_hungarian_uca_ci= { 210,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_hungarian_ci",/* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_hungarian_ci",/* name */ "", /* comment */ hungarian, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35507,19 +35507,19 @@ struct charset_info_st my_charset_utf8_hungarian_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_sinhala_uca_ci= +struct charset_info_st my_charset_utf8mb3_sinhala_uca_ci= { 211,0,0, /* number */ - MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_sinhala_ci", /* name */ + MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_sinhala_ci", /* name */ "", /* comment */ sinhala, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35539,20 +35539,20 @@ struct charset_info_st my_charset_utf8_sinhala_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_german2_uca_ci= +struct charset_info_st my_charset_utf8mb3_german2_uca_ci= { 212,0,0, /* number */ - MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ + MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ MY_UTF8MB3, /* cs name */ - MY_UTF8MB3 "_german2_ci",/* name */ + MY_UTF8MB3 "_german2_ci",/* name */ "", /* comment */ german2, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35572,19 +35572,19 @@ struct charset_info_st my_charset_utf8_german2_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_croatian_mysql561_uca_ci= +struct charset_info_st my_charset_utf8mb3_croatian_mysql561_uca_ci= { 213,0,0, /* number */ - MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ + MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ MY_UTF8MB3, /* cs name */ MY_UTF8MB3 "_croatian_mysql561_ci",/* name */ "", /* comment */ croatian_mysql561, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35604,20 +35604,20 @@ struct charset_info_st my_charset_utf8_croatian_mysql561_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_croatian_uca_ci= +struct charset_info_st my_charset_utf8mb3_croatian_uca_ci= { MY_PAGE2_COLLATION_ID_UTF8,0,0, /* number */ - MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ + MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ MY_UTF8MB3, /* cs name */ MY_UTF8MB3 "_croatian_ci",/* name */ "", /* comment */ croatian_mariadb, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35637,20 +35637,20 @@ struct charset_info_st my_charset_utf8_croatian_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_myanmar_uca_ci= +struct charset_info_st my_charset_utf8mb3_myanmar_uca_ci= { MY_PAGE2_COLLATION_ID_UTF8+1,0,0, /* number */ - MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ + MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ MY_UTF8MB3, /* cs name */ - MY_UTF8MB3 "_myanmar_ci",/* name */ + MY_UTF8MB3 "_myanmar_ci",/* name */ "", /* comment */ myanmar, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35670,12 +35670,12 @@ struct charset_info_st my_charset_utf8_myanmar_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_unicode_520_ci= +struct charset_info_st my_charset_utf8mb3_unicode_520_ci= { 214,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ @@ -35683,7 +35683,7 @@ struct charset_info_st my_charset_utf8_unicode_520_ci= MY_UTF8MB3 "_unicode_520_ci",/* name */ "", /* comment */ "", /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35703,11 +35703,11 @@ struct charset_info_st my_charset_utf8_unicode_520_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_thai_520_w2= +struct charset_info_st my_charset_utf8mb3_thai_520_w2= { MY_PAGE2_COLLATION_ID_UTF8+2,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ @@ -35715,7 +35715,7 @@ struct charset_info_st my_charset_utf8_thai_520_w2= MY_UTF8MB3 "_thai_520_w2",/* name */ "", /* comment */ "[strength 2]", /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35735,11 +35735,11 @@ struct charset_info_st my_charset_utf8_thai_520_w2= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 2, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_multilevel_utf8mb3 }; -struct charset_info_st my_charset_utf8_vietnamese_ci= +struct charset_info_st my_charset_utf8mb3_vietnamese_ci= { 215,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ @@ -35747,7 +35747,7 @@ struct charset_info_st my_charset_utf8_vietnamese_ci= MY_UTF8MB3 "_vietnamese_ci",/* name */ "", /* comment */ vietnamese, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35767,12 +35767,12 @@ struct charset_info_st my_charset_utf8_vietnamese_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_unicode_nopad_ci= +struct charset_info_st my_charset_utf8mb3_unicode_nopad_ci= { MY_NOPAD_ID(192),0,0, /* number */ MY_CS_UTF8MB3_UCA_NOPAD_FLAGS, /* flags */ @@ -35780,7 +35780,7 @@ struct charset_info_st my_charset_utf8_unicode_nopad_ci= MY_UTF8MB3 "_unicode_nopad_ci",/* name */ "", /* comment */ "", /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35800,12 +35800,12 @@ struct charset_info_st my_charset_utf8_unicode_nopad_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_nopad_utf8mb3 }; -struct charset_info_st my_charset_utf8_unicode_520_nopad_ci= +struct charset_info_st my_charset_utf8mb3_unicode_520_nopad_ci= { MY_NOPAD_ID(214),0,0, /* number */ MY_CS_UTF8MB3_UCA_NOPAD_FLAGS, /* flags */ @@ -35813,7 +35813,7 @@ struct charset_info_st my_charset_utf8_unicode_520_nopad_ci= MY_UTF8MB3 "_unicode_520_nopad_ci", /* name */ "", /* comment */ "", /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35833,11 +35833,11 @@ struct charset_info_st my_charset_utf8_unicode_520_nopad_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_nopad_utf8mb3 }; -#endif /* HAVE_CHARSET_utf8 */ +#endif /* HAVE_CHARSET_utf8mb3 */ #ifdef HAVE_CHARSET_utf8mb4 @@ -35883,12 +35883,12 @@ extern MY_CHARSET_HANDLER my_charset_utf8mb4_handler; struct charset_info_st my_charset_utf8mb4_unicode_ci= { 224,0,0, /* number */ - MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_unicode_ci",/* name */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_unicode_ci",/* name */ "", /* comment */ "", /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35916,12 +35916,12 @@ struct charset_info_st my_charset_utf8mb4_unicode_ci= struct charset_info_st my_charset_utf8mb4_icelandic_uca_ci= { 225,0,0, /* number */ - MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_icelandic_ci",/* name */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_icelandic_ci",/* name */ "", /* comment */ icelandic, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35953,7 +35953,7 @@ struct charset_info_st my_charset_utf8mb4_latvian_uca_ci= MY_UTF8MB4 "_latvian_ci", /* name */ "", /* comment */ latvian, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35980,12 +35980,12 @@ struct charset_info_st my_charset_utf8mb4_latvian_uca_ci= struct charset_info_st my_charset_utf8mb4_romanian_uca_ci= { 227,0,0, /* number */ - MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_romanian_ci", /* name */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_romanian_ci", /* name */ "", /* comment */ romanian, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36012,12 +36012,12 @@ struct charset_info_st my_charset_utf8mb4_romanian_uca_ci= struct charset_info_st my_charset_utf8mb4_slovenian_uca_ci= { 228,0,0, /* number */ - MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_slovenian_ci",/* name */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_slovenian_ci",/* name */ "", /* comment */ slovenian, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36044,12 +36044,12 @@ struct charset_info_st my_charset_utf8mb4_slovenian_uca_ci= struct charset_info_st my_charset_utf8mb4_polish_uca_ci= { 229,0,0, /* number */ - MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_polish_ci", /* name */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_polish_ci", /* name */ "", /* comment */ polish, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36081,7 +36081,7 @@ struct charset_info_st my_charset_utf8mb4_estonian_uca_ci= MY_UTF8MB4 "_estonian_ci", /* name */ "", /* comment */ estonian, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36108,12 +36108,12 @@ struct charset_info_st my_charset_utf8mb4_estonian_uca_ci= struct charset_info_st my_charset_utf8mb4_spanish_uca_ci= { 231,0,0, /* number */ - MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_spanish_ci", /* name */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_spanish_ci", /* name */ "", /* comment */ spanish, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36140,12 +36140,12 @@ struct charset_info_st my_charset_utf8mb4_spanish_uca_ci= struct charset_info_st my_charset_utf8mb4_swedish_uca_ci= { 232,0,0, /* number */ - MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_swedish_ci", /* name */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_swedish_ci", /* name */ "", /* comment */ swedish, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36172,12 +36172,12 @@ struct charset_info_st my_charset_utf8mb4_swedish_uca_ci= struct charset_info_st my_charset_utf8mb4_turkish_uca_ci= { 233,0,0, /* number */ - MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_turkish_ci", /* name */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_turkish_ci", /* name */ "", /* comment */ turkish, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36204,12 +36204,12 @@ struct charset_info_st my_charset_utf8mb4_turkish_uca_ci= struct charset_info_st my_charset_utf8mb4_czech_uca_ci= { 234,0,0, /* number */ - MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_czech_ci", /* name */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_czech_ci", /* name */ "", /* comment */ czech, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36237,12 +36237,12 @@ struct charset_info_st my_charset_utf8mb4_czech_uca_ci= struct charset_info_st my_charset_utf8mb4_danish_uca_ci= { 235,0,0, /* number */ - MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_danish_ci", /* name */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_danish_ci", /* name */ "", /* comment */ danish, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36269,12 +36269,12 @@ struct charset_info_st my_charset_utf8mb4_danish_uca_ci= struct charset_info_st my_charset_utf8mb4_lithuanian_uca_ci= { 236,0,0, /* number */ - MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_lithuanian_ci",/* name */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_lithuanian_ci",/* name */ "", /* comment */ lithuanian, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36301,12 +36301,12 @@ struct charset_info_st my_charset_utf8mb4_lithuanian_uca_ci= struct charset_info_st my_charset_utf8mb4_slovak_uca_ci= { 237,0,0, /* number */ - MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_slovak_ci", /* name */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_slovak_ci", /* name */ "", /* comment */ slovak, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36333,12 +36333,12 @@ struct charset_info_st my_charset_utf8mb4_slovak_uca_ci= struct charset_info_st my_charset_utf8mb4_spanish2_uca_ci= { 238,0,0, /* number */ - MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_spanish2_ci", /* name */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_spanish2_ci", /* name */ "", /* comment */ spanish2, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36365,12 +36365,12 @@ struct charset_info_st my_charset_utf8mb4_spanish2_uca_ci= struct charset_info_st my_charset_utf8mb4_roman_uca_ci= { 239,0,0, /* number */ - MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_roman_ci", /* name */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_roman_ci", /* name */ "", /* comment */ roman, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36397,12 +36397,12 @@ struct charset_info_st my_charset_utf8mb4_roman_uca_ci= struct charset_info_st my_charset_utf8mb4_persian_uca_ci= { 240,0,0, /* number */ - MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_persian_ci", /* name */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_persian_ci", /* name */ "", /* comment */ persian, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36429,12 +36429,12 @@ struct charset_info_st my_charset_utf8mb4_persian_uca_ci= struct charset_info_st my_charset_utf8mb4_esperanto_uca_ci= { 241,0,0, /* number */ - MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_esperanto_ci",/* name */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_esperanto_ci",/* name */ "", /* comment */ esperanto, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36461,12 +36461,12 @@ struct charset_info_st my_charset_utf8mb4_esperanto_uca_ci= struct charset_info_st my_charset_utf8mb4_hungarian_uca_ci= { 242,0,0, /* number */ - MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_hungarian_ci",/* name */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_hungarian_ci",/* name */ "", /* comment */ hungarian, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36494,11 +36494,11 @@ struct charset_info_st my_charset_utf8mb4_sinhala_uca_ci= { 243,0,0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_sinhala_ci",/* name */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_sinhala_ci",/* name */ "", /* comment */ sinhala, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36526,11 +36526,11 @@ struct charset_info_st my_charset_utf8mb4_german2_uca_ci= { 244,0,0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_german2_ci",/* name */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_german2_ci",/* name */ "", /* comment */ german2, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36562,7 +36562,7 @@ struct charset_info_st my_charset_utf8mb4_croatian_mysql561_uca_ci= MY_UTF8MB4 "_croatian_mysql561_ci",/* name */ "", /* comment */ croatian_mysql561, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36591,11 +36591,11 @@ struct charset_info_st my_charset_utf8mb4_croatian_uca_ci= { MY_PAGE2_COLLATION_ID_UTF8MB4,0,0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ + MY_UTF8MB4, /* csname */ MY_UTF8MB4 "_croatian_ci",/* name */ "", /* comment */ croatian_mariadb, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36624,11 +36624,11 @@ struct charset_info_st my_charset_utf8mb4_myanmar_uca_ci= { MY_PAGE2_COLLATION_ID_UTF8MB4+1,0,0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_myanmar_ci",/* name */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_myanmar_ci",/* name */ "", /* comment */ myanmar, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36660,7 +36660,7 @@ struct charset_info_st my_charset_utf8mb4_thai_520_w2= MY_UTF8MB4 "_thai_520_w2", /* name */ "", /* comment */ "[strength 2]", /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36692,7 +36692,7 @@ struct charset_info_st my_charset_utf8mb4_unicode_520_ci= MY_UTF8MB4 "_unicode_520_ci",/* name */ "", /* comment */ "", /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36725,7 +36725,7 @@ struct charset_info_st my_charset_utf8mb4_vietnamese_ci= MY_UTF8MB4 "_vietnamese_ci",/* name */ "", /* comment */ vietnamese, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36758,7 +36758,7 @@ struct charset_info_st my_charset_utf8mb4_unicode_nopad_ci= MY_UTF8MB4 "_unicode_nopad_ci", /* name */ "", /* comment */ "", /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36791,7 +36791,7 @@ struct charset_info_st my_charset_utf8mb4_unicode_520_nopad_ci= MY_UTF8MB4 "_unicode_520_nopad_ci", /* name */ "", /* comment */ "", /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -37482,7 +37482,7 @@ struct charset_info_st my_charset_utf32_german2_uca_ci= { 180,0,0, /* number */ MY_CS_UTF32_UCA_FLAGS,/* state */ - "utf32", /* csname */ + "utf32", /* csname */ "utf32_german2_ci", /* name */ "", /* comment */ german2, /* tailoring */ diff --git a/strings/ctype-uca.ic b/strings/ctype-uca.ic index 70c10199e3e..b7108eb7f9d 100644 --- a/strings/ctype-uca.ic +++ b/strings/ctype-uca.ic @@ -432,7 +432,7 @@ MY_FUNCTION_NAME(strnncollsp_nopad_multilevel)(CHARSET_INFO *cs, This functions is used for one-level and for multi-level collations. We intentionally use only primary level in multi-level collations. This helps to have PARTITION BY KEY put primarily equal records - into the same partition. E.g. in utf8_thai_520_ci records that differ + into the same partition. E.g. in utf8mb3_thai_520_ci records that differ only in tone marks go into the same partition. RETURN diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index 01c549a7eaa..bef6d198e22 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -49,11 +49,11 @@ -#ifndef HAVE_CHARSET_utf8 -#define HAVE_CHARSET_utf8 +#ifndef HAVE_CHARSET_utf8mb3 +#define HAVE_CHARSET_utf8mb3 #endif -#ifdef HAVE_CHARSET_utf8 +#ifdef HAVE_CHARSET_utf8mb3 #define HAVE_UNIDATA #endif @@ -70,7 +70,7 @@ #endif -#if defined(HAVE_CHARSET_utf8) || defined(HAVE_CHARSET_utf8mb4) +#if defined(HAVE_CHARSET_utf8mb3) || defined(HAVE_CHARSET_utf8mb4) static inline int my_valid_mbcharlen_utf8mb3(const uchar *s, const uchar *e) @@ -106,7 +106,7 @@ int my_valid_mbcharlen_utf8mb3(const uchar *s, const uchar *e) return 3; } -#endif /*HAVE_CHARSET_utf8 || HAVE_CHARSET_utf8mb4*/ +#endif /*HAVE_CHARSET_utf8mb3 || HAVE_CHARSET_utf8mb4*/ #ifdef HAVE_UNIDATA @@ -1737,7 +1737,7 @@ MY_UNICASE_INFO my_unicase_default= /* - Reproduce old utf8_general_ci behaviour before we fixed Bug#27877. + Reproduce old utf8mb3_general_ci behaviour before we fixed Bug#27877. */ MY_UNICASE_CHARACTER *my_unicase_pages_mysql500[256]={ plane00_mysql500, @@ -4750,7 +4750,7 @@ my_strnxfrmlen_unicode_full_bin(CHARSET_INFO *cs, size_t len) #endif /* HAVE_UNIDATA */ -#ifdef HAVE_CHARSET_utf8 +#ifdef HAVE_CHARSET_utf8mb3 /* We consider bytes with code more than 127 as a letter. @@ -4758,7 +4758,7 @@ my_strnxfrmlen_unicode_full_bin(CHARSET_INFO *cs, size_t len) expressions. Note, there is no need to mark byte 255 as a letter, it is illegal byte in UTF8. */ -static const uchar ctype_utf8[] = { +static const uchar ctype_utf8mb3[] = { 0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, @@ -4780,7 +4780,7 @@ static const uchar ctype_utf8[] = { /* The below are taken from usa7 implementation */ -static const uchar to_lower_utf8[] = { +static const uchar to_lower_utf8mb3[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, @@ -4799,7 +4799,7 @@ static const uchar to_lower_utf8[] = { 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255 }; -static const uchar to_upper_utf8[] = { +static const uchar to_upper_utf8mb3[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, @@ -4818,8 +4818,8 @@ static const uchar to_upper_utf8[] = { 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255 }; -static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)), - my_wc_t * pwc, const uchar *s, const uchar *e) +static int my_utf8mb3_uni(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t * pwc, const uchar *s, const uchar *e) { return my_mb_wc_utf8mb3_quick(pwc, s, e); } @@ -4829,8 +4829,8 @@ static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)), The same as above, but without range check for example, for a null-terminated string */ -static int my_utf8_uni_no_range(CHARSET_INFO *cs __attribute__((unused)), - my_wc_t * pwc, const uchar *s) +static int my_utf8mb3_uni_no_range(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t * pwc, const uchar *s) { uchar c; @@ -4865,8 +4865,8 @@ static int my_utf8_uni_no_range(CHARSET_INFO *cs __attribute__((unused)), } -static int my_uni_utf8 (CHARSET_INFO *cs __attribute__((unused)), - my_wc_t wc, uchar *r, uchar *e) +static int my_uni_utf8mb3(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t wc, uchar *r, uchar *e) { if (wc < 0x80) { @@ -4901,8 +4901,8 @@ static int my_uni_utf8 (CHARSET_INFO *cs __attribute__((unused)), /* The same as above, but without range check. */ -static int my_uni_utf8_no_range(CHARSET_INFO *cs __attribute__((unused)), - my_wc_t wc, uchar *r) +static int my_uni_utf8mb3_no_range(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t wc, uchar *r) { int count; @@ -4945,8 +4945,9 @@ my_toupper_utf8mb3(MY_UNICASE_INFO *uni_plane, my_wc_t *wc) } -static size_t my_caseup_utf8(CHARSET_INFO *cs, const char *src, size_t srclen, - char *dst, size_t dstlen) +static size_t my_caseup_utf8mb3(CHARSET_INFO *cs, + const char *src, size_t srclen, + char *dst, size_t dstlen) { my_wc_t wc; int srcres, dstres; @@ -4956,10 +4957,10 @@ static size_t my_caseup_utf8(CHARSET_INFO *cs, const char *src, size_t srclen, DBUG_ASSERT(src != dst || cs->caseup_multiply == 1); while ((src < srcend) && - (srcres= my_utf8_uni(cs, &wc, (uchar *) src, (uchar*) srcend)) > 0) + (srcres= my_utf8mb3_uni(cs, &wc, (uchar *) src, (uchar*) srcend)) > 0) { my_toupper_utf8mb3(uni_plane, &wc); - if ((dstres= my_uni_utf8(cs, wc, (uchar*) dst, (uchar*) dstend)) <= 0) + if ((dstres= my_uni_utf8mb3(cs, wc, (uchar*) dst, (uchar*) dstend)) <= 0) break; src+= srcres; dst+= dstres; @@ -4968,8 +4969,8 @@ static size_t my_caseup_utf8(CHARSET_INFO *cs, const char *src, size_t srclen, } -static void my_hash_sort_utf8_nopad(CHARSET_INFO *cs, const uchar *s, size_t slen, - ulong *nr1, ulong *nr2) +static void my_hash_sort_utf8mb3_nopad(CHARSET_INFO *cs, const uchar *s, size_t slen, + ulong *nr1, ulong *nr2) { my_wc_t wc; int res; @@ -4977,7 +4978,7 @@ static void my_hash_sort_utf8_nopad(CHARSET_INFO *cs, const uchar *s, size_t sle MY_UNICASE_INFO *uni_plane= cs->caseinfo; register ulong m1= *nr1, m2= *nr2; - while ((s < e) && (res=my_utf8_uni(cs,&wc, (uchar *)s, (uchar*)e))>0 ) + while ((s < e) && (res=my_utf8mb3_uni(cs,&wc, (uchar *)s, (uchar*)e))>0 ) { my_tosort_unicode(uni_plane, &wc, cs->state); MY_HASH_ADD_16(m1, m2, wc); @@ -4988,8 +4989,8 @@ static void my_hash_sort_utf8_nopad(CHARSET_INFO *cs, const uchar *s, size_t sle } -static void my_hash_sort_utf8(CHARSET_INFO *cs, const uchar *s, size_t slen, - ulong *nr1, ulong *nr2) +static void my_hash_sort_utf8mb3(CHARSET_INFO *cs, const uchar *s, size_t slen, + ulong *nr1, ulong *nr2) { const uchar *e= s+slen; /* @@ -4998,11 +4999,11 @@ static void my_hash_sort_utf8(CHARSET_INFO *cs, const uchar *s, size_t slen, */ while (e > s && e[-1] == ' ') e--; - my_hash_sort_utf8_nopad(cs, s, e - s, nr1, nr2); + my_hash_sort_utf8mb3_nopad(cs, s, e - s, nr1, nr2); } -static size_t my_caseup_str_utf8(CHARSET_INFO *cs, char *src) +static size_t my_caseup_str_utf8mb3(CHARSET_INFO *cs, char *src) { my_wc_t wc; int srcres, dstres; @@ -5011,10 +5012,10 @@ static size_t my_caseup_str_utf8(CHARSET_INFO *cs, char *src) DBUG_ASSERT(cs->caseup_multiply == 1); while (*src && - (srcres= my_utf8_uni_no_range(cs, &wc, (uchar *) src)) > 0) + (srcres= my_utf8mb3_uni_no_range(cs, &wc, (uchar *) src)) > 0) { my_toupper_utf8mb3(uni_plane, &wc); - if ((dstres= my_uni_utf8_no_range(cs, wc, (uchar*) dst)) <= 0) + if ((dstres= my_uni_utf8mb3_no_range(cs, wc, (uchar*) dst)) <= 0) break; src+= srcres; dst+= dstres; @@ -5024,8 +5025,9 @@ static size_t my_caseup_str_utf8(CHARSET_INFO *cs, char *src) } -static size_t my_casedn_utf8(CHARSET_INFO *cs, const char *src, size_t srclen, - char *dst, size_t dstlen) +static size_t my_casedn_utf8mb3(CHARSET_INFO *cs, + const char *src, size_t srclen, + char *dst, size_t dstlen) { my_wc_t wc; int srcres, dstres; @@ -5035,10 +5037,10 @@ static size_t my_casedn_utf8(CHARSET_INFO *cs, const char *src, size_t srclen, DBUG_ASSERT(src != dst || cs->casedn_multiply == 1); while ((src < srcend) && - (srcres= my_utf8_uni(cs, &wc, (uchar*) src, (uchar*)srcend)) > 0) + (srcres= my_utf8mb3_uni(cs, &wc, (uchar*) src, (uchar*)srcend)) > 0) { my_tolower_utf8mb3(uni_plane, &wc); - if ((dstres= my_uni_utf8(cs, wc, (uchar*) dst, (uchar*) dstend)) <= 0) + if ((dstres= my_uni_utf8mb3(cs, wc, (uchar*) dst, (uchar*) dstend)) <= 0) break; src+= srcres; dst+= dstres; @@ -5047,7 +5049,7 @@ static size_t my_casedn_utf8(CHARSET_INFO *cs, const char *src, size_t srclen, } -static size_t my_casedn_str_utf8(CHARSET_INFO *cs, char *src) +static size_t my_casedn_str_utf8mb3(CHARSET_INFO *cs, char *src) { my_wc_t wc; int srcres, dstres; @@ -5056,10 +5058,10 @@ static size_t my_casedn_str_utf8(CHARSET_INFO *cs, char *src) DBUG_ASSERT(cs->casedn_multiply == 1); while (*src && - (srcres= my_utf8_uni_no_range(cs, &wc, (uchar *) src)) > 0) + (srcres= my_utf8mb3_uni_no_range(cs, &wc, (uchar *) src)) > 0) { my_tolower_utf8mb3(uni_plane, &wc); - if ((dstres= my_uni_utf8_no_range(cs, wc, (uchar*) dst)) <= 0) + if ((dstres= my_uni_utf8mb3_no_range(cs, wc, (uchar*) dst)) <= 0) break; src+= srcres; dst+= dstres; @@ -5070,12 +5072,12 @@ static size_t my_casedn_str_utf8(CHARSET_INFO *cs, char *src) the original string, for example: "U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE" - (which is 0xC4B0 in utf8, i.e. two bytes) + (which is 0xC4B0 in utf8mb3, i.e. two bytes) is converted into "U+0069 LATIN SMALL LETTER I" - (which is 0x69 in utf8, i.e. one byte) + (which is 0x69 in utf8mb3, i.e. one byte) So, we need to put '\0' terminator after converting. */ @@ -5089,7 +5091,7 @@ static size_t my_casedn_str_utf8(CHARSET_INFO *cs, char *src) Compare 0-terminated UTF8 strings. SYNOPSIS - my_strcasecmp_utf8() + my_strcasecmp_utf8mb3() cs character set handler s First 0-terminated string to compare t Second 0-terminated string to compare @@ -5103,7 +5105,7 @@ static size_t my_casedn_str_utf8(CHARSET_INFO *cs, char *src) */ static -int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t) +int my_strcasecmp_utf8mb3(CHARSET_INFO *cs, const char *s, const char *t) { MY_UNICASE_INFO *uni_plane= cs->caseinfo; while (s[0] && t[0]) @@ -5127,19 +5129,19 @@ int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t) /* Scan a multibyte character. - In the future it is worth to write a special version of my_utf8_uni() + In the future it is worth to write a special version of my_utf8mb3_uni() for 0-terminated strings which will not take in account length. Now - we call the regular version of my_utf8_uni() with s+3 in the + we call the regular version of my_utf8mb3_uni() with s+3 in the last argument. s+3 is enough to scan any multibyte sequence. - Calling the regular version of my_utf8_uni is safe for 0-terminated + Calling the regular version of my_utf8mb3_uni is safe for 0-terminated strings: we will never lose the end of the string: If we have 0 character in the middle of a multibyte sequence, - then my_utf8_uni will always return a negative number, so the + then my_utf8mb3_uni will always return a negative number, so the loop with finish. */ - res= my_utf8_uni(cs,&s_wc, (const uchar*)s, (const uchar*) s + 3); + res= my_utf8mb3_uni(cs,&s_wc, (const uchar*)s, (const uchar*) s + 3); /* In the case of wrong multibyte sequence we will @@ -5164,7 +5166,7 @@ int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t) } else { - int res=my_utf8_uni(cs,&t_wc, (const uchar*)t, (const uchar*) t + 3); + int res=my_utf8mb3_uni(cs,&t_wc, (const uchar*)t, (const uchar*) t + 3); if (res <= 0) return strcmp(s, t); t+= res; @@ -5182,10 +5184,10 @@ int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t) static -int my_wildcmp_utf8(CHARSET_INFO *cs, - const char *str,const char *str_end, - const char *wildstr,const char *wildend, - int escape, int w_one, int w_many) +int my_wildcmp_utf8mb3(CHARSET_INFO *cs, + const char *str,const char *str_end, + const char *wildstr,const char *wildend, + int escape, int w_one, int w_many) { MY_UNICASE_INFO *uni_plane= cs->caseinfo; return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend, @@ -5194,8 +5196,8 @@ int my_wildcmp_utf8(CHARSET_INFO *cs, static -int my_charlen_utf8(CHARSET_INFO *cs __attribute__((unused)), - const uchar *s, const uchar *e) +int my_charlen_utf8mb3(CHARSET_INFO *cs __attribute__((unused)), + const uchar *s, const uchar *e) { uchar c; @@ -5210,23 +5212,23 @@ int my_charlen_utf8(CHARSET_INFO *cs __attribute__((unused)), } -#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8 -#define CHARLEN(cs,str,end) my_charlen_utf8(cs,str,end) +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8mb3 +#define CHARLEN(cs,str,end) my_charlen_utf8mb3(cs,str,end) #define DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN #include "ctype-mb.ic" #undef MY_FUNCTION_NAME #undef CHARLEN #undef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN -/* my_well_formed_char_length_utf8 */ +/* my_well_formed_char_length_utf8mb3 */ -static inline int my_weight_mb1_utf8_general_ci(uchar b) +static inline int my_weight_mb1_utf8mb3_general_ci(uchar b) { return (int) my_unicase_default_page00[b & 0xFF].sort; } -static inline int my_weight_mb2_utf8_general_ci(uchar b0, uchar b1) +static inline int my_weight_mb2_utf8mb3_general_ci(uchar b0, uchar b1) { my_wc_t wc= UTF8MB2_CODE(b0, b1); MY_UNICASE_CHARACTER *page= my_unicase_default_pages[wc >> 8]; @@ -5234,7 +5236,7 @@ static inline int my_weight_mb2_utf8_general_ci(uchar b0, uchar b1) } -static inline int my_weight_mb3_utf8_general_ci(uchar b0, uchar b1, uchar b2) +static inline int my_weight_mb3_utf8mb3_general_ci(uchar b0, uchar b1, uchar b2) { my_wc_t wc= UTF8MB3_CODE(b0, b1, b2); MY_UNICASE_CHARACTER *page= my_unicase_default_pages[wc >> 8]; @@ -5242,7 +5244,7 @@ static inline int my_weight_mb3_utf8_general_ci(uchar b0, uchar b1, uchar b2) } -#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8_general_ci +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8mb3_general_ci #define DEFINE_STRNXFRM_UNICODE #define DEFINE_STRNXFRM_UNICODE_NOPAD #define MY_MB_WC(cs, pwc, s, e) my_mb_wc_utf8mb3_quick(pwc, s, e) @@ -5251,28 +5253,28 @@ static inline int my_weight_mb3_utf8_general_ci(uchar b0, uchar b1, uchar b2) #define UNICASE_PAGE0 my_unicase_default_page00 #define UNICASE_PAGES my_unicase_default_pages #define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) -#define WEIGHT_MB1(x) my_weight_mb1_utf8_general_ci(x) -#define WEIGHT_MB2(x,y) my_weight_mb2_utf8_general_ci(x,y) -#define WEIGHT_MB3(x,y,z) my_weight_mb3_utf8_general_ci(x,y,z) +#define WEIGHT_MB1(x) my_weight_mb1_utf8mb3_general_ci(x) +#define WEIGHT_MB2(x,y) my_weight_mb2_utf8mb3_general_ci(x,y) +#define WEIGHT_MB3(x,y,z) my_weight_mb3_utf8mb3_general_ci(x,y,z) #include "strcoll.ic" #define DEFINE_STRNNCOLLSP_NOPAD -#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8_general_nopad_ci +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8mb3_general_nopad_ci #define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) -#define WEIGHT_MB1(x) my_weight_mb1_utf8_general_ci(x) -#define WEIGHT_MB2(x,y) my_weight_mb2_utf8_general_ci(x,y) -#define WEIGHT_MB3(x,y,z) my_weight_mb3_utf8_general_ci(x,y,z) +#define WEIGHT_MB1(x) my_weight_mb1_utf8mb3_general_ci(x) +#define WEIGHT_MB2(x,y) my_weight_mb2_utf8mb3_general_ci(x,y) +#define WEIGHT_MB3(x,y,z) my_weight_mb3_utf8mb3_general_ci(x,y,z) #include "strcoll.ic" -static inline int my_weight_mb1_utf8_general_mysql500_ci(uchar b) +static inline int my_weight_mb1_utf8mb3_general_mysql500_ci(uchar b) { return (int) plane00_mysql500[b & 0xFF].sort; } -static inline int my_weight_mb2_utf8_general_mysql500_ci(uchar b0, uchar b1) +static inline int my_weight_mb2_utf8mb3_general_mysql500_ci(uchar b0, uchar b1) { my_wc_t wc= UTF8MB2_CODE(b0, b1); MY_UNICASE_CHARACTER *page= my_unicase_pages_mysql500[wc >> 8]; @@ -5281,7 +5283,7 @@ static inline int my_weight_mb2_utf8_general_mysql500_ci(uchar b0, uchar b1) static inline int -my_weight_mb3_utf8_general_mysql500_ci(uchar b0, uchar b1, uchar b2) +my_weight_mb3_utf8mb3_general_mysql500_ci(uchar b0, uchar b1, uchar b2) { my_wc_t wc= UTF8MB3_CODE(b0, b1, b2); MY_UNICASE_CHARACTER *page= my_unicase_pages_mysql500[wc >> 8]; @@ -5289,7 +5291,7 @@ my_weight_mb3_utf8_general_mysql500_ci(uchar b0, uchar b1, uchar b2) } -#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8_general_mysql500_ci +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8mb3_general_mysql500_ci #define DEFINE_STRNXFRM_UNICODE #define MY_MB_WC(cs, pwc, s, e) my_mb_wc_utf8mb3_quick(pwc, s, e) #define OPTIMIZE_ASCII 1 @@ -5297,13 +5299,13 @@ my_weight_mb3_utf8_general_mysql500_ci(uchar b0, uchar b1, uchar b2) #define UNICASE_PAGE0 plane00_mysql500 #define UNICASE_PAGES my_unicase_pages_mysql500 #define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) -#define WEIGHT_MB1(x) my_weight_mb1_utf8_general_mysql500_ci(x) -#define WEIGHT_MB2(x,y) my_weight_mb2_utf8_general_mysql500_ci(x,y) -#define WEIGHT_MB3(x,y,z) my_weight_mb3_utf8_general_mysql500_ci(x,y,z) +#define WEIGHT_MB1(x) my_weight_mb1_utf8mb3_general_mysql500_ci(x) +#define WEIGHT_MB2(x,y) my_weight_mb2_utf8mb3_general_mysql500_ci(x,y) +#define WEIGHT_MB3(x,y,z) my_weight_mb3_utf8mb3_general_mysql500_ci(x,y,z) #include "strcoll.ic" -#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8_bin +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8mb3_bin #define DEFINE_STRNXFRM_UNICODE_BIN2 #define MY_MB_WC(cs, pwc, s, e) my_mb_wc_utf8mb3_quick(pwc, s, e) #define OPTIMIZE_ASCII 1 @@ -5315,7 +5317,7 @@ my_weight_mb3_utf8_general_mysql500_ci(uchar b0, uchar b1, uchar b2) #define DEFINE_STRNNCOLLSP_NOPAD -#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8_nopad_bin +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8mb3_nopad_bin #define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) #define WEIGHT_MB1(x) ((int) (uchar) (x)) #define WEIGHT_MB2(x,y) ((int) UTF8MB2_CODE(x,y)) @@ -5326,7 +5328,7 @@ my_weight_mb3_utf8_general_mysql500_ci(uchar b0, uchar b1, uchar b2) TODO-10.2: join this with pad_max_char() in ctype-mb.c */ static void -my_fill_utf8_mb(CHARSET_INFO *cs, char *str, size_t length, int fill) +my_fill_utf8mb3_mb(CHARSET_INFO *cs, char *str, size_t length, int fill) { char *end= str + length; char buf[10]; @@ -5345,53 +5347,53 @@ my_fill_utf8_mb(CHARSET_INFO *cs, char *str, size_t length, int fill) static void -my_fill_utf8(CHARSET_INFO *cs, char *str, size_t length, int fill) +my_fill_utf8mb3(CHARSET_INFO *cs, char *str, size_t length, int fill) { if (fill < 0x80) my_fill_8bit(cs, str, length, fill); else - my_fill_utf8_mb(cs, str, length, fill); + my_fill_utf8mb3_mb(cs, str, length, fill); } -static MY_COLLATION_HANDLER my_collation_utf8_general_ci_handler = +static MY_COLLATION_HANDLER my_collation_utf8mb3_general_ci_handler = { NULL, /* init */ - my_strnncoll_utf8_general_ci, - my_strnncollsp_utf8_general_ci, - my_strnxfrm_utf8_general_ci, + my_strnncoll_utf8mb3_general_ci, + my_strnncollsp_utf8mb3_general_ci, + my_strnxfrm_utf8mb3_general_ci, my_strnxfrmlen_unicode, my_like_range_mb, - my_wildcmp_utf8, - my_strcasecmp_utf8, + my_wildcmp_utf8mb3, + my_strcasecmp_utf8mb3, my_instr_mb, - my_hash_sort_utf8, + my_hash_sort_utf8mb3, my_propagate_complex }; -static MY_COLLATION_HANDLER my_collation_utf8_general_mysql500_ci_handler = +static MY_COLLATION_HANDLER my_collation_utf8mb3_general_mysql500_ci_handler = { NULL, /* init */ - my_strnncoll_utf8_general_mysql500_ci, - my_strnncollsp_utf8_general_mysql500_ci, - my_strnxfrm_utf8_general_mysql500_ci, + my_strnncoll_utf8mb3_general_mysql500_ci, + my_strnncollsp_utf8mb3_general_mysql500_ci, + my_strnxfrm_utf8mb3_general_mysql500_ci, my_strnxfrmlen_unicode, my_like_range_mb, - my_wildcmp_utf8, - my_strcasecmp_utf8, + my_wildcmp_utf8mb3, + my_strcasecmp_utf8mb3, my_instr_mb, - my_hash_sort_utf8, + my_hash_sort_utf8mb3, my_propagate_complex }; -static MY_COLLATION_HANDLER my_collation_utf8_bin_handler = +static MY_COLLATION_HANDLER my_collation_utf8mb3_bin_handler = { NULL, /* init */ - my_strnncoll_utf8_bin, - my_strnncollsp_utf8_bin, - my_strnxfrm_utf8_bin, + my_strnncoll_utf8mb3_bin, + my_strnncollsp_utf8mb3_bin, + my_strnxfrm_utf8mb3_bin, my_strnxfrmlen_unicode, my_like_range_mb, my_wildcmp_mb_bin, @@ -5402,28 +5404,28 @@ static MY_COLLATION_HANDLER my_collation_utf8_bin_handler = }; -static MY_COLLATION_HANDLER my_collation_utf8_general_nopad_ci_handler = +static MY_COLLATION_HANDLER my_collation_utf8mb3_general_nopad_ci_handler = { NULL, /* init */ - my_strnncoll_utf8_general_ci, - my_strnncollsp_utf8_general_nopad_ci, - my_strnxfrm_nopad_utf8_general_ci, + my_strnncoll_utf8mb3_general_ci, + my_strnncollsp_utf8mb3_general_nopad_ci, + my_strnxfrm_nopad_utf8mb3_general_ci, my_strnxfrmlen_unicode, my_like_range_mb, - my_wildcmp_utf8, - my_strcasecmp_utf8, + my_wildcmp_utf8mb3, + my_strcasecmp_utf8mb3, my_instr_mb, - my_hash_sort_utf8_nopad, + my_hash_sort_utf8mb3_nopad, my_propagate_complex }; -static MY_COLLATION_HANDLER my_collation_utf8_nopad_bin_handler = +static MY_COLLATION_HANDLER my_collation_utf8mb3_nopad_bin_handler = { NULL, /* init */ - my_strnncoll_utf8_bin, - my_strnncollsp_utf8_nopad_bin, - my_strnxfrm_nopad_utf8_bin, + my_strnncoll_utf8mb3_bin, + my_strnncollsp_utf8mb3_nopad_bin, + my_strnxfrm_nopad_utf8mb3_bin, my_strnxfrmlen_unicode, my_like_range_mb, my_wildcmp_mb_bin, @@ -5434,24 +5436,24 @@ static MY_COLLATION_HANDLER my_collation_utf8_nopad_bin_handler = }; -MY_CHARSET_HANDLER my_charset_utf8_handler= +MY_CHARSET_HANDLER my_charset_utf8mb3_handler= { NULL, /* init */ my_numchars_mb, my_charpos_mb, my_lengthsp_8bit, my_numcells_mb, - my_utf8_uni, - my_uni_utf8, + my_utf8mb3_uni, + my_uni_utf8mb3, my_mb_ctype_mb, - my_caseup_str_utf8, - my_casedn_str_utf8, - my_caseup_utf8, - my_casedn_utf8, + my_caseup_str_utf8mb3, + my_casedn_str_utf8mb3, + my_caseup_utf8mb3, + my_casedn_utf8mb3, my_snprintf_8bit, my_long10_to_str_8bit, my_longlong10_to_str_8bit, - my_fill_utf8, + my_fill_utf8mb3, my_strntol_8bit, my_strntoul_8bit, my_strntoll_8bit, @@ -5460,26 +5462,26 @@ MY_CHARSET_HANDLER my_charset_utf8_handler= my_strtoll10_8bit, my_strntoull10rnd_8bit, my_scan_8bit, - my_charlen_utf8, - my_well_formed_char_length_utf8, + my_charlen_utf8mb3, + my_well_formed_char_length_utf8mb3, my_copy_fix_mb, - my_uni_utf8, + my_uni_utf8mb3, }; -struct charset_info_st my_charset_utf8_general_ci= +struct charset_info_st my_charset_utf8mb3_general_ci= { 33,0,0, /* number */ MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE, /* state */ - "utf8", /* cs name */ - "utf8_general_ci", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_general_ci", /* name */ "", /* comment */ NULL, /* tailoring */ - ctype_utf8, /* ctype */ - to_lower_utf8, /* to_lower */ - to_upper_utf8, /* to_upper */ - to_upper_utf8, /* sort_order */ + ctype_utf8mb3, /* ctype */ + to_lower_utf8mb3, /* to_lower */ + to_upper_utf8mb3, /* to_upper */ + to_upper_utf8mb3, /* sort_order */ NULL, /* uca */ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ @@ -5496,23 +5498,23 @@ struct charset_info_st my_charset_utf8_general_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, - &my_collation_utf8_general_ci_handler + &my_charset_utf8mb3_handler, + &my_collation_utf8mb3_general_ci_handler }; -struct charset_info_st my_charset_utf8_general_mysql500_ci= +struct charset_info_st my_charset_utf8mb3_general_mysql500_ci= { 223,0,0, /* number */ MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, /* state */ - "utf8", /* cs name */ - "utf8_general_mysql500_ci", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_general_mysql500_ci", /* name */ "", /* comment */ NULL, /* tailoring */ - ctype_utf8, /* ctype */ - to_lower_utf8, /* to_lower */ - to_upper_utf8, /* to_upper */ - to_upper_utf8, /* sort_order */ + ctype_utf8mb3, /* ctype */ + to_lower_utf8mb3, /* to_lower */ + to_upper_utf8mb3, /* to_upper */ + to_upper_utf8mb3, /* sort_order */ NULL, /* uca */ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ @@ -5529,22 +5531,22 @@ struct charset_info_st my_charset_utf8_general_mysql500_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, - &my_collation_utf8_general_mysql500_ci_handler + &my_charset_utf8mb3_handler, + &my_collation_utf8mb3_general_mysql500_ci_handler }; -struct charset_info_st my_charset_utf8_bin= +struct charset_info_st my_charset_utf8mb3_bin= { 83,0,0, /* number */ MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_BINSORT|MY_CS_UNICODE, /* state */ - "utf8", /* cs name */ - "utf8_bin", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_bin", /* name */ "", /* comment */ NULL, /* tailoring */ - ctype_utf8, /* ctype */ - to_lower_utf8, /* to_lower */ - to_upper_utf8, /* to_upper */ + ctype_utf8mb3, /* ctype */ + to_lower_utf8mb3, /* to_lower */ + to_upper_utf8mb3, /* to_upper */ NULL, /* sort_order */ NULL, /* uca */ NULL, /* tab_to_uni */ @@ -5562,23 +5564,23 @@ struct charset_info_st my_charset_utf8_bin= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, - &my_collation_utf8_bin_handler + &my_charset_utf8mb3_handler, + &my_collation_utf8mb3_bin_handler }; -struct charset_info_st my_charset_utf8_general_nopad_ci= +struct charset_info_st my_charset_utf8mb3_general_nopad_ci= { MY_NOPAD_ID(33),0,0, /* number */ MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NOPAD, /* state */ - "utf8", /* cs name */ - "utf8_general_nopad_ci", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_general_nopad_ci", /* name */ "", /* comment */ NULL, /* tailoring */ - ctype_utf8, /* ctype */ - to_lower_utf8, /* to_lower */ - to_upper_utf8, /* to_upper */ - to_upper_utf8, /* sort_order */ + ctype_utf8mb3, /* ctype */ + to_lower_utf8mb3, /* to_lower */ + to_upper_utf8mb3, /* to_upper */ + to_upper_utf8mb3, /* sort_order */ NULL, /* uca */ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ @@ -5595,22 +5597,22 @@ struct charset_info_st my_charset_utf8_general_nopad_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, - &my_collation_utf8_general_nopad_ci_handler + &my_charset_utf8mb3_handler, + &my_collation_utf8mb3_general_nopad_ci_handler }; -struct charset_info_st my_charset_utf8_nopad_bin= +struct charset_info_st my_charset_utf8mb3_nopad_bin= { MY_NOPAD_ID(83),0,0,/* number */ MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NOPAD, - "utf8", /* cs name */ - "utf8_nopad_bin", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_nopad_bin", /* name */ "", /* comment */ NULL, /* tailoring */ - ctype_utf8, /* ctype */ - to_lower_utf8, /* to_lower */ - to_upper_utf8, /* to_upper */ + ctype_utf8mb3, /* ctype */ + to_lower_utf8mb3, /* to_lower */ + to_upper_utf8mb3, /* to_upper */ NULL, /* sort_order */ NULL, /* uca */ NULL, /* tab_to_uni */ @@ -5628,8 +5630,8 @@ struct charset_info_st my_charset_utf8_nopad_bin= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, - &my_collation_utf8_nopad_bin_handler + &my_charset_utf8mb3_handler, + &my_collation_utf8mb3_nopad_bin_handler }; @@ -5642,7 +5644,7 @@ struct charset_info_st my_charset_utf8_nopad_bin= * variable to what they actually do. */ -static int my_strnncoll_utf8_cs(CHARSET_INFO *cs, +static int my_strnncoll_utf8mb3_cs(CHARSET_INFO *cs, const uchar *s, size_t slen, const uchar *t, size_t tlen, my_bool t_is_prefix) @@ -5657,8 +5659,8 @@ static int my_strnncoll_utf8_cs(CHARSET_INFO *cs, while ( s < se && t < te ) { - s_res=my_utf8_uni(cs,&s_wc, s, se); - t_res=my_utf8_uni(cs,&t_wc, t, te); + s_res=my_utf8mb3_uni(cs,&s_wc, s, se); + t_res=my_utf8mb3_uni(cs,&t_wc, t, te); if ( s_res <= 0 || t_res <= 0 ) @@ -5687,7 +5689,7 @@ static int my_strnncoll_utf8_cs(CHARSET_INFO *cs, return t_is_prefix ? t-te : ((diff == 0) ? save_diff : diff); } -static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs, +static int my_strnncollsp_utf8mb3_cs(CHARSET_INFO *cs, const uchar *s, size_t slen, const uchar *t, size_t tlen) { @@ -5700,8 +5702,8 @@ static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs, while ( s < se && t < te ) { - s_res=my_utf8_uni(cs,&s_wc, s, se); - t_res=my_utf8_uni(cs,&t_wc, t, te); + s_res=my_utf8mb3_uni(cs,&s_wc, s, se); + t_res=my_utf8mb3_uni(cs,&t_wc, t, te); if ( s_res <= 0 || t_res <= 0 ) { @@ -5750,30 +5752,30 @@ static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs, static MY_COLLATION_HANDLER my_collation_cs_handler = { NULL, /* init */ - my_strnncoll_utf8_cs, - my_strnncollsp_utf8_cs, - my_strnxfrm_utf8_general_ci, + my_strnncoll_utf8mb3_cs, + my_strnncollsp_utf8mb3_cs, + my_strnxfrm_utf8mb3_general_ci, my_strnxfrmlen_unicode, my_like_range_simple, my_wildcmp_mb, - my_strcasecmp_utf8, + my_strcasecmp_utf8mb3, my_instr_mb, - my_hash_sort_utf8, + my_hash_sort_utf8mb3, my_propagate_simple }; -struct charset_info_st my_charset_utf8_general_cs= +struct charset_info_st my_charset_utf8mb3_general_cs= { 254,0,0, /* number */ - MY_CS_COMPILED|MY_CS_UNICODE, /* state */ - "utf8", /* cs name */ - "utf8_general_cs", /* name */ + MY_CS_COMPILED|MY_CS_UNICODE, /* state */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_general_cs", /* name */ "", /* comment */ NULL, /* tailoring */ - ctype_utf8, /* ctype */ - to_lower_utf8, /* to_lower */ - to_upper_utf8, /* to_upper */ - to_upper_utf8, /* sort_order */ + ctype_utf8mb3, /* ctype */ + to_lower_utf8mb3, /* to_lower */ + to_upper_utf8mb3, /* to_upper */ + to_upper_utf8mb3, /* sort_order */ NULL, /* uca */ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ @@ -5790,7 +5792,7 @@ struct charset_info_st my_charset_utf8_general_cs= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_collation_cs_handler }; #endif /* Cybozu Hack */ @@ -7048,9 +7050,9 @@ my_charlen_filename(CHARSET_INFO *cs, const uchar *str, const uchar *end) /* #define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) -#define WEIGHT_MB1(x) my_weight_mb1_utf8_general_ci(x) -#define WEIGHT_MB2(x,y) my_weight_mb2_utf8_general_ci(x,y) -#define WEIGHT_MB3(x,y,z) my_weight_mb3_utf8_general_ci(x,y,z) +#define WEIGHT_MB1(x) my_weight_mb1_utf8mb3_general_ci(x) +#define WEIGHT_MB2(x,y) my_weight_mb2_utf8mb3_general_ci(x,y) +#define WEIGHT_MB3(x,y,z) my_weight_mb3_utf8mb3_general_ci(x,y,z) */ #include "strcoll.ic" @@ -7063,10 +7065,10 @@ static MY_COLLATION_HANDLER my_collation_filename_handler = my_strnxfrm_filename, my_strnxfrmlen_unicode, my_like_range_mb, - my_wildcmp_utf8, - my_strcasecmp_utf8, + my_wildcmp_utf8mb3, + my_strcasecmp_utf8mb3, my_instr_mb, - my_hash_sort_utf8, + my_hash_sort_utf8mb3, my_propagate_complex }; @@ -7080,10 +7082,10 @@ static MY_CHARSET_HANDLER my_charset_filename_handler= my_mb_wc_filename, my_wc_mb_filename, my_mb_ctype_mb, - my_caseup_str_utf8, - my_casedn_str_utf8, - my_caseup_utf8, - my_casedn_utf8, + my_caseup_str_utf8mb3, + my_casedn_str_utf8mb3, + my_caseup_utf8mb3, + my_casedn_utf8mb3, my_snprintf_8bit, my_long10_to_str_8bit, my_longlong10_to_str_8bit, @@ -7112,10 +7114,10 @@ struct charset_info_st my_charset_filename= "filename", /* name */ "", /* comment */ NULL, /* tailoring */ - ctype_utf8, /* ctype */ - to_lower_utf8, /* to_lower */ - to_upper_utf8, /* to_upper */ - to_upper_utf8, /* sort_order */ + ctype_utf8mb3, /* ctype */ + to_lower_utf8mb3, /* to_lower */ + to_upper_utf8mb3, /* to_upper */ + to_upper_utf8mb3, /* sort_order */ NULL, /* uca */ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ @@ -7506,12 +7508,12 @@ my_casedn_str_utf8mb4(CHARSET_INFO *cs, char *src) the original string, for example: "U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE" - (which is 0xC4B0 in utf8, i.e. two bytes) + (which is 0xC4B0 in utf8mb3, i.e. two bytes) is converted into "U+0069 LATIN SMALL LETTER I" - (which is 0x69 in utf8, i.e. one byte) + (which is 0x69 in utf8mb3, i.e. one byte) So, we need to put '\0' terminator after converting. */ @@ -7653,9 +7655,9 @@ my_charlen_utf8mb4(CHARSET_INFO *cs __attribute__((unused)), #define UNICASE_PAGES my_unicase_default_pages #define IS_MB4_CHAR(b0,b1,b2,b3) IS_UTF8MB4_STEP3(b0,b1,b2,b3) #define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) -#define WEIGHT_MB1(b0) my_weight_mb1_utf8_general_ci(b0) -#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf8_general_ci(b0,b1) -#define WEIGHT_MB3(b0,b1,b2) my_weight_mb3_utf8_general_ci(b0,b1,b2) +#define WEIGHT_MB1(b0) my_weight_mb1_utf8mb3_general_ci(b0) +#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf8mb3_general_ci(b0,b1) +#define WEIGHT_MB3(b0,b1,b2) my_weight_mb3_utf8mb3_general_ci(b0,b1,b2) /* All non-BMP characters have the same weight. */ @@ -7676,9 +7678,9 @@ my_charlen_utf8mb4(CHARSET_INFO *cs __attribute__((unused)), #define MY_FUNCTION_NAME(x) my_ ## x ## _utf8mb4_general_nopad_ci #define IS_MB4_CHAR(b0,b1,b2,b3) IS_UTF8MB4_STEP3(b0,b1,b2,b3) #define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) -#define WEIGHT_MB1(b0) my_weight_mb1_utf8_general_ci(b0) -#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf8_general_ci(b0,b1) -#define WEIGHT_MB3(b0,b1,b2) my_weight_mb3_utf8_general_ci(b0,b1,b2) +#define WEIGHT_MB1(b0) my_weight_mb1_utf8mb3_general_ci(b0) +#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf8mb3_general_ci(b0,b1) +#define WEIGHT_MB3(b0,b1,b2) my_weight_mb3_utf8mb3_general_ci(b0,b1,b2) /* All non-BMP characters have the same weight. */ @@ -7777,7 +7779,7 @@ MY_CHARSET_HANDLER my_charset_utf8mb4_handler= my_snprintf_8bit, my_long10_to_str_8bit, my_longlong10_to_str_8bit, - my_fill_utf8, + my_fill_utf8mb3, my_strntol_8bit, my_strntoul_8bit, my_strntoll_8bit, diff --git a/strings/ctype.c b/strings/ctype.c index 32c41e6e9e7..9a89c6fe41d 100644 --- a/strings/ctype.c +++ b/strings/ctype.c @@ -413,7 +413,7 @@ tailoring_append2(MY_XML_PARSER *st, static size_t scan_one_character(const char *s, const char *e, my_wc_t *wc) { - CHARSET_INFO *cs= &my_charset_utf8_general_ci; + CHARSET_INFO *cs= &my_charset_utf8mb3_general_ci; if (s >= e) return 0; diff --git a/strings/strcoll.ic b/strings/strcoll.ic index 50278135dd4..e7d614ebdf5 100644 --- a/strings/strcoll.ic +++ b/strings/strcoll.ic @@ -105,7 +105,7 @@ MY_FUNCTION_NAME(scan_weight)(int *weight, const uchar *str, const uchar *end) #ifdef IS_MB1_MBHEAD_UNUSED_GAP /* Quickly filter out unused bytes that are neither MB1 nor MBHEAD. - E.g. [0x80..0xC1] in utf8. This allows using simplified conditions + E.g. [0x80..0xC1] in utf8mb(3|4). This allows using simplified conditions in IS_MB2_CHAR(), IS_MB3_CHAR(), etc. */ if (IS_MB1_MBHEAD_UNUSED_GAP(*str)) @@ -158,7 +158,7 @@ bad: Note, cs->coll->strnncoll() is usually used to compare identifiers. Perhaps we should eventually (in 10.2?) create a new collation - my_charset_utf8_general_ci_no_pad and have only one comparison function + my_charset_utf8mb3_general_ci_no_pad and have only one comparison function in MY_COLLATION_HANDLER. @param cs - the character set and collation @@ -339,7 +339,7 @@ MY_FUNCTION_NAME(strnxfrm)(CHARSET_INFO *cs, Store sorting weights using 2 bytes per character. This function is shared between - - utf8mb3_general_ci, utf8_bin, ucs2_general_ci, ucs2_bin + - utf8mb3_general_ci, utf8mb3_bin, ucs2_general_ci, ucs2_bin which support BMP only (U+0000..U+FFFF). - utf8mb4_general_ci, utf16_general_ci, utf32_general_ci, which map all supplementary characters to weight 0xFFFD. @@ -473,7 +473,7 @@ MY_FUNCTION_NAME(strnxfrm_nopad)(CHARSET_INFO *cs, Store sorting weights using 2 bytes per character. These functions are shared between - - utf8mb3_general_ci, utf8_bin, ucs2_general_ci, ucs2_bin + - utf8mb3_general_ci, utf8mb3_bin, ucs2_general_ci, ucs2_bin which support BMP only (U+0000..U+FFFF). - utf8mb4_general_ci, utf16_general_ci, utf32_general_ci, which map all supplementary characters to weight 0xFFFD. |