diff options
author | Alexander Barkov <bar@mariadb.com> | 2019-06-28 09:05:12 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mariadb.com> | 2019-06-28 12:37:04 +0400 |
commit | 3e7e87ddccf71ba5328c257b7d642d6803fbca3a (patch) | |
tree | 0ea22d4c533b84b2675c3d7c7555f5ad367222ac | |
parent | 323a87b591d3e9bcedb7dea09b69d9eeb1c42880 (diff) | |
download | mariadb-git-3e7e87ddccf71ba5328c257b7d642d6803fbca3a.tar.gz |
MDEV-19897 Rename source code variable names from utf8 to utf8mb3
56 files changed, 727 insertions, 725 deletions
diff --git a/client/mysqlbinlog.cc b/client/mysqlbinlog.cc index 93e23b106c2..f2f57f6e0bb 100644 --- a/client/mysqlbinlog.cc +++ b/client/mysqlbinlog.cc @@ -67,7 +67,7 @@ Rpl_filter *binlog_filter= 0; #define PROBE_HEADER_LEN (EVENT_LEN_OFFSET+4) /* Needed for Rpl_filter */ -CHARSET_INFO* system_charset_info= &my_charset_utf8_general_ci; +CHARSET_INFO* system_charset_info= &my_charset_utf8mb3_general_ci; /* Needed for Flashback */ DYNAMIC_ARRAY binlog_events; // Storing the events output string diff --git a/cmake/character_sets.cmake b/cmake/character_sets.cmake index 37de79758b3..4d59df825e8 100644 --- a/cmake/character_sets.cmake +++ b/cmake/character_sets.cmake @@ -32,7 +32,7 @@ binary armscii8 ascii big5 cp1250 cp1251 cp1256 cp1257 cp850 cp852 cp866 cp932 dec8 eucjpms euckr gb2312 gbk geostd8 greek hebrew hp8 keybcs2 koi8r koi8u latin1 latin2 latin5 latin7 macce macroman -sjis swe7 tis620 ucs2 ujis utf8 utf8mb4 utf16 utf16le utf32) +sjis swe7 tis620 ucs2 ujis utf8mb3 utf8mb4 utf16 utf16le utf32) SET (EXTRA_CHARSETS "all") diff --git a/config.h.cmake b/config.h.cmake index 8fbbaa3dafa..e2fd67a128d 100644 --- a/config.h.cmake +++ b/config.h.cmake @@ -505,7 +505,6 @@ #cmakedefine HAVE_CHARSET_ujis 1 #cmakedefine HAVE_CHARSET_utf8mb4 1 #cmakedefine HAVE_CHARSET_utf8mb3 1 -#cmakedefine HAVE_CHARSET_utf8 1 #cmakedefine HAVE_CHARSET_utf16 1 #cmakedefine HAVE_CHARSET_utf32 1 #cmakedefine HAVE_UCA_COLLATIONS 1 diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc index 84a873d0729..31223ec71f8 100644 --- a/extra/mariabackup/xtrabackup.cc +++ b/extra/mariabackup/xtrabackup.cc @@ -5893,7 +5893,7 @@ handle_options(int argc, char **argv, char ***argv_client, char ***argv_server) srv_operation = SRV_OPERATION_RESTORE; - files_charset_info = &my_charset_utf8_general_ci; + files_charset_info = &my_charset_utf8mb3_general_ci; setup_error_messages(); @@ -6117,7 +6117,7 @@ int main(int argc, char **argv) die("mysql_server_init() failed"); } - system_charset_info = &my_charset_utf8_general_ci; + system_charset_info = &my_charset_utf8mb3_general_ci; key_map_full.set_all(); logger.init_base(); diff --git a/include/m_ctype.h b/include/m_ctype.h index 0f6e6a11666..54e1a166592 100644 --- a/include/m_ctype.h +++ b/include/m_ctype.h @@ -531,7 +531,7 @@ struct my_charset_handler_st extern MY_CHARSET_HANDLER my_charset_8bit_handler; extern MY_CHARSET_HANDLER my_charset_ucs2_handler; -extern MY_CHARSET_HANDLER my_charset_utf8_handler; +extern MY_CHARSET_HANDLER my_charset_utf8mb3_handler; /* @@ -582,7 +582,7 @@ extern MYSQL_PLUGIN_IMPORT struct charset_info_st my_charset_bin; extern MYSQL_PLUGIN_IMPORT struct charset_info_st my_charset_latin1; extern MYSQL_PLUGIN_IMPORT struct charset_info_st my_charset_latin1_nopad; extern MYSQL_PLUGIN_IMPORT struct charset_info_st my_charset_filename; -extern MYSQL_PLUGIN_IMPORT struct charset_info_st my_charset_utf8_general_ci; +extern MYSQL_PLUGIN_IMPORT struct charset_info_st my_charset_utf8mb3_general_ci; extern struct charset_info_st my_charset_big5_bin; extern struct charset_info_st my_charset_big5_chinese_ci; @@ -649,12 +649,12 @@ extern struct charset_info_st my_charset_utf32_unicode_ci; extern struct charset_info_st my_charset_utf32_unicode_nopad_ci; extern struct charset_info_st my_charset_utf32_nopad_bin; extern struct charset_info_st my_charset_utf32_general_nopad_ci; -extern struct charset_info_st my_charset_utf8_bin; -extern struct charset_info_st my_charset_utf8_nopad_bin; -extern struct charset_info_st my_charset_utf8_general_nopad_ci; -extern struct charset_info_st my_charset_utf8_general_mysql500_ci; -extern struct charset_info_st my_charset_utf8_unicode_ci; -extern struct charset_info_st my_charset_utf8_unicode_nopad_ci; +extern struct charset_info_st my_charset_utf8mb3_bin; +extern struct charset_info_st my_charset_utf8mb3_nopad_bin; +extern struct charset_info_st my_charset_utf8mb3_general_nopad_ci; +extern struct charset_info_st my_charset_utf8mb3_general_mysql500_ci; +extern struct charset_info_st my_charset_utf8mb3_unicode_ci; +extern struct charset_info_st my_charset_utf8mb3_unicode_nopad_ci; extern struct charset_info_st my_charset_utf8mb4_bin; extern struct charset_info_st my_charset_utf8mb4_general_ci; extern struct charset_info_st my_charset_utf8mb4_nopad_bin; diff --git a/include/ma_dyncol.h b/include/ma_dyncol.h index 4f05b425afd..833a25b937e 100644 --- a/include/ma_dyncol.h +++ b/include/ma_dyncol.h @@ -66,7 +66,7 @@ typedef struct st_mysql_lex_string LEX_STRING; #ifdef HAVE_CHARSET_utf8mb4 #define DYNCOL_UTF (&my_charset_utf8mb4_general_ci) #else -#define DYNCOL_UTF (&my_charset_utf8_general_ci) +#define DYNCOL_UTF (&my_charset_utf8mb3_general_ci) #endif /* escape json strings */ diff --git a/include/my_global.h b/include/my_global.h index 248320e301f..c0774146166 100644 --- a/include/my_global.h +++ b/include/my_global.h @@ -1049,8 +1049,8 @@ typedef ulong myf; /* Type of MyFlags in my_funcs */ #ifdef HAVE_CHARSET_utf8mb4 #define MYSQL_UNIVERSAL_CLIENT_CHARSET "utf8mb4" -#elif defined(HAVE_CHARSET_utf8) -#define MYSQL_UNIVERSAL_CLIENT_CHARSET "utf8" +#elif defined(HAVE_CHARSET_utf8mb3) +#define MYSQL_UNIVERSAL_CLIENT_CHARSET "utf8mb3" #else #define MYSQL_UNIVERSAL_CLIENT_CHARSET MYSQL_DEFAULT_CHARSET_NAME #endif diff --git a/libmysqld/CMakeLists.txt b/libmysqld/CMakeLists.txt index f59163eb6a8..34126c50bc9 100644 --- a/libmysqld/CMakeLists.txt +++ b/libmysqld/CMakeLists.txt @@ -297,7 +297,7 @@ mysql_stmt_next_result # Charsets my_charset_bin my_charset_latin1 -my_charset_utf8_general_ci +my_charset_utf8mb3_general_ci # Client plugins mysql_client_find_plugin mysql_client_register_plugin diff --git a/libmysqld/lib_sql.cc b/libmysqld/lib_sql.cc index 80fd79ae56b..b0fb6054a97 100644 --- a/libmysqld/lib_sql.cc +++ b/libmysqld/lib_sql.cc @@ -559,7 +559,7 @@ int init_embedded_server(int argc, char **argv, char **groups) remaining_argv= *argvp; /* Must be initialized early for comparison of options name */ - system_charset_info= &my_charset_utf8_general_ci; + system_charset_info= &my_charset_utf8mb3_general_ci; sys_var_init(); int ho_error= handle_early_options(); diff --git a/mysys/charset-def.c b/mysys/charset-def.c index b4317806762..249fb1b5e4d 100644 --- a/mysys/charset-def.c +++ b/mysys/charset-def.c @@ -116,37 +116,37 @@ extern struct charset_info_st my_charset_utf16_unicode_520_nopad_ci; #endif /* HAVE_CHARSET_utf16 */ -#ifdef HAVE_CHARSET_utf8 -extern struct charset_info_st my_charset_utf8_german2_uca_ci; -extern struct charset_info_st my_charset_utf8_icelandic_uca_ci; -extern struct charset_info_st my_charset_utf8_latvian_uca_ci; -extern struct charset_info_st my_charset_utf8_romanian_uca_ci; -extern struct charset_info_st my_charset_utf8_slovenian_uca_ci; -extern struct charset_info_st my_charset_utf8_polish_uca_ci; -extern struct charset_info_st my_charset_utf8_estonian_uca_ci; -extern struct charset_info_st my_charset_utf8_spanish_uca_ci; -extern struct charset_info_st my_charset_utf8_swedish_uca_ci; -extern struct charset_info_st my_charset_utf8_turkish_uca_ci; -extern struct charset_info_st my_charset_utf8_czech_uca_ci; -extern struct charset_info_st my_charset_utf8_danish_uca_ci; -extern struct charset_info_st my_charset_utf8_lithuanian_uca_ci; -extern struct charset_info_st my_charset_utf8_slovak_uca_ci; -extern struct charset_info_st my_charset_utf8_spanish2_uca_ci; -extern struct charset_info_st my_charset_utf8_roman_uca_ci; -extern struct charset_info_st my_charset_utf8_persian_uca_ci; -extern struct charset_info_st my_charset_utf8_esperanto_uca_ci; -extern struct charset_info_st my_charset_utf8_hungarian_uca_ci; -extern struct charset_info_st my_charset_utf8_croatian_mysql561_uca_ci; -extern struct charset_info_st my_charset_utf8_sinhala_uca_ci; -extern struct charset_info_st my_charset_utf8_unicode_520_ci; -extern struct charset_info_st my_charset_utf8_vietnamese_ci; -extern struct charset_info_st my_charset_utf8_croatian_uca_ci; -extern struct charset_info_st my_charset_utf8_myanmar_uca_ci; -extern struct charset_info_st my_charset_utf8_thai_520_w2; +#ifdef HAVE_CHARSET_utf8mb3 +extern struct charset_info_st my_charset_utf8mb3_german2_uca_ci; +extern struct charset_info_st my_charset_utf8mb3_icelandic_uca_ci; +extern struct charset_info_st my_charset_utf8mb3_latvian_uca_ci; +extern struct charset_info_st my_charset_utf8mb3_romanian_uca_ci; +extern struct charset_info_st my_charset_utf8mb3_slovenian_uca_ci; +extern struct charset_info_st my_charset_utf8mb3_polish_uca_ci; +extern struct charset_info_st my_charset_utf8mb3_estonian_uca_ci; +extern struct charset_info_st my_charset_utf8mb3_spanish_uca_ci; +extern struct charset_info_st my_charset_utf8mb3_swedish_uca_ci; +extern struct charset_info_st my_charset_utf8mb3_turkish_uca_ci; +extern struct charset_info_st my_charset_utf8mb3_czech_uca_ci; +extern struct charset_info_st my_charset_utf8mb3_danish_uca_ci; +extern struct charset_info_st my_charset_utf8mb3_lithuanian_uca_ci; +extern struct charset_info_st my_charset_utf8mb3_slovak_uca_ci; +extern struct charset_info_st my_charset_utf8mb3_spanish2_uca_ci; +extern struct charset_info_st my_charset_utf8mb3_roman_uca_ci; +extern struct charset_info_st my_charset_utf8mb3_persian_uca_ci; +extern struct charset_info_st my_charset_utf8mb3_esperanto_uca_ci; +extern struct charset_info_st my_charset_utf8mb3_hungarian_uca_ci; +extern struct charset_info_st my_charset_utf8mb3_croatian_mysql561_uca_ci; +extern struct charset_info_st my_charset_utf8mb3_sinhala_uca_ci; +extern struct charset_info_st my_charset_utf8mb3_unicode_520_ci; +extern struct charset_info_st my_charset_utf8mb3_vietnamese_ci; +extern struct charset_info_st my_charset_utf8mb3_croatian_uca_ci; +extern struct charset_info_st my_charset_utf8mb3_myanmar_uca_ci; +extern struct charset_info_st my_charset_utf8mb3_thai_520_w2; #ifdef HAVE_UTF8_GENERAL_CS -extern struct charset_info_st my_charset_utf8_general_cs; +extern struct charset_info_st my_charset_utf8mb3_general_cs; #endif -extern struct charset_info_st my_charset_utf8_unicode_520_nopad_ci; +extern struct charset_info_st my_charset_utf8mb3_unicode_520_nopad_ci; #endif #ifdef HAVE_CHARSET_utf8mb4 @@ -304,47 +304,47 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused))) add_compiled_collation(&my_charset_ujis_nopad_bin); #endif -#ifdef HAVE_CHARSET_utf8 - add_compiled_collation(&my_charset_utf8_general_ci); - add_compiled_collation(&my_charset_utf8_general_nopad_ci); - add_compiled_collation(&my_charset_utf8_bin); - add_compiled_collation(&my_charset_utf8_nopad_bin); - add_compiled_collation(&my_charset_utf8_general_mysql500_ci); +#ifdef HAVE_CHARSET_utf8mb3 + add_compiled_collation(&my_charset_utf8mb3_general_ci); + add_compiled_collation(&my_charset_utf8mb3_general_nopad_ci); + add_compiled_collation(&my_charset_utf8mb3_bin); + add_compiled_collation(&my_charset_utf8mb3_nopad_bin); + add_compiled_collation(&my_charset_utf8mb3_general_mysql500_ci); #ifdef HAVE_UTF8_GENERAL_CS - add_compiled_collation(&my_charset_utf8_general_cs); + add_compiled_collation(&my_charset_utf8mb3_general_cs); #endif #ifdef HAVE_UCA_COLLATIONS - add_compiled_collation(&my_charset_utf8_unicode_ci); - add_compiled_collation(&my_charset_utf8_german2_uca_ci); - add_compiled_collation(&my_charset_utf8_icelandic_uca_ci); - add_compiled_collation(&my_charset_utf8_latvian_uca_ci); - add_compiled_collation(&my_charset_utf8_romanian_uca_ci); - add_compiled_collation(&my_charset_utf8_slovenian_uca_ci); - add_compiled_collation(&my_charset_utf8_polish_uca_ci); - add_compiled_collation(&my_charset_utf8_estonian_uca_ci); - add_compiled_collation(&my_charset_utf8_spanish_uca_ci); - add_compiled_collation(&my_charset_utf8_swedish_uca_ci); - add_compiled_collation(&my_charset_utf8_turkish_uca_ci); - add_compiled_collation(&my_charset_utf8_czech_uca_ci); - add_compiled_collation(&my_charset_utf8_danish_uca_ci); - add_compiled_collation(&my_charset_utf8_lithuanian_uca_ci); - add_compiled_collation(&my_charset_utf8_slovak_uca_ci); - add_compiled_collation(&my_charset_utf8_spanish2_uca_ci); - add_compiled_collation(&my_charset_utf8_roman_uca_ci); - add_compiled_collation(&my_charset_utf8_persian_uca_ci); - add_compiled_collation(&my_charset_utf8_esperanto_uca_ci); - add_compiled_collation(&my_charset_utf8_hungarian_uca_ci); - add_compiled_collation(&my_charset_utf8_croatian_mysql561_uca_ci); - add_compiled_collation(&my_charset_utf8_sinhala_uca_ci); - add_compiled_collation(&my_charset_utf8_unicode_520_ci); - add_compiled_collation(&my_charset_utf8_vietnamese_ci); - add_compiled_collation(&my_charset_utf8_croatian_uca_ci); - add_compiled_collation(&my_charset_utf8_myanmar_uca_ci); - add_compiled_collation(&my_charset_utf8_thai_520_w2); - add_compiled_collation(&my_charset_utf8_unicode_nopad_ci); - add_compiled_collation(&my_charset_utf8_unicode_520_nopad_ci); + add_compiled_collation(&my_charset_utf8mb3_unicode_ci); + add_compiled_collation(&my_charset_utf8mb3_german2_uca_ci); + add_compiled_collation(&my_charset_utf8mb3_icelandic_uca_ci); + add_compiled_collation(&my_charset_utf8mb3_latvian_uca_ci); + add_compiled_collation(&my_charset_utf8mb3_romanian_uca_ci); + add_compiled_collation(&my_charset_utf8mb3_slovenian_uca_ci); + add_compiled_collation(&my_charset_utf8mb3_polish_uca_ci); + add_compiled_collation(&my_charset_utf8mb3_estonian_uca_ci); + add_compiled_collation(&my_charset_utf8mb3_spanish_uca_ci); + add_compiled_collation(&my_charset_utf8mb3_swedish_uca_ci); + add_compiled_collation(&my_charset_utf8mb3_turkish_uca_ci); + add_compiled_collation(&my_charset_utf8mb3_czech_uca_ci); + add_compiled_collation(&my_charset_utf8mb3_danish_uca_ci); + add_compiled_collation(&my_charset_utf8mb3_lithuanian_uca_ci); + add_compiled_collation(&my_charset_utf8mb3_slovak_uca_ci); + add_compiled_collation(&my_charset_utf8mb3_spanish2_uca_ci); + add_compiled_collation(&my_charset_utf8mb3_roman_uca_ci); + add_compiled_collation(&my_charset_utf8mb3_persian_uca_ci); + add_compiled_collation(&my_charset_utf8mb3_esperanto_uca_ci); + add_compiled_collation(&my_charset_utf8mb3_hungarian_uca_ci); + add_compiled_collation(&my_charset_utf8mb3_croatian_mysql561_uca_ci); + add_compiled_collation(&my_charset_utf8mb3_sinhala_uca_ci); + add_compiled_collation(&my_charset_utf8mb3_unicode_520_ci); + add_compiled_collation(&my_charset_utf8mb3_vietnamese_ci); + add_compiled_collation(&my_charset_utf8mb3_croatian_uca_ci); + add_compiled_collation(&my_charset_utf8mb3_myanmar_uca_ci); + add_compiled_collation(&my_charset_utf8mb3_thai_520_w2); + add_compiled_collation(&my_charset_utf8mb3_unicode_nopad_ci); + add_compiled_collation(&my_charset_utf8mb3_unicode_520_nopad_ci); #endif -#endif /* HAVE_CHARSET_utf8 */ +#endif /* HAVE_CHARSET_utf8mb3 */ #ifdef HAVE_CHARSET_utf8mb4 diff --git a/mysys/charset.c b/mysys/charset.c index f44dc7606c1..7771f5800ef 100644 --- a/mysys/charset.c +++ b/mysys/charset.c @@ -262,7 +262,7 @@ static my_bool simple_cs_is_full(CHARSET_INFO *cs) } -#if defined(HAVE_UCA_COLLATIONS) && (defined(HAVE_CHARSET_ucs2) || defined(HAVE_CHARSET_utf8)) +#if defined(HAVE_UCA_COLLATIONS) && (defined(HAVE_CHARSET_ucs2) || defined(HAVE_CHARSET_utf8mb3)) /** Initialize a loaded collation. @param [OUT] to - The new charset_info_st structure to initialize. @@ -350,12 +350,12 @@ static int add_collation(struct charset_info_st *cs) } else if (!strcmp(cs->csname, "utf8") || !strcmp(cs->csname, "utf8mb3")) { -#if defined (HAVE_CHARSET_utf8) && defined(HAVE_UCA_COLLATIONS) +#if defined (HAVE_CHARSET_utf8mb3) && defined(HAVE_UCA_COLLATIONS) copy_uca_collation(newcs, newcs->state & MY_CS_NOPAD ? - &my_charset_utf8_unicode_nopad_ci : - &my_charset_utf8_unicode_ci, + &my_charset_utf8mb3_unicode_nopad_ci : + &my_charset_utf8mb3_unicode_ci, cs); - newcs->ctype= my_charset_utf8_unicode_ci.ctype; + newcs->ctype= my_charset_utf8mb3_unicode_ci.ctype; if (init_state_maps(newcs)) return MY_XML_ERROR; #endif diff --git a/mysys/my_error.c b/mysys/my_error.c index 5f1ca0af55b..cb1fbfe1c04 100644 --- a/mysys/my_error.c +++ b/mysys/my_error.c @@ -118,7 +118,7 @@ void my_error(uint nr, myf MyFlags, ...) else { va_start(args,MyFlags); - (void) my_vsnprintf_ex(&my_charset_utf8_general_ci, ebuff, + (void) my_vsnprintf_ex(&my_charset_utf8mb3_general_ci, ebuff, sizeof(ebuff), format, args); va_end(args); } @@ -148,7 +148,7 @@ void my_printf_error(uint error, const char *format, myf MyFlags, ...) error, MyFlags, errno, format)); va_start(args,MyFlags); - (void) my_vsnprintf_ex(&my_charset_utf8_general_ci, ebuff, + (void) my_vsnprintf_ex(&my_charset_utf8mb3_general_ci, ebuff, sizeof(ebuff), format, args); va_end(args); (*error_handler_hook)(error, ebuff, MyFlags); diff --git a/sql/handler.cc b/sql/handler.cc index 1f14a567ddd..93d52c970ae 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -4025,7 +4025,7 @@ int handler::check_collation_compatibility() cs_number == 23 || /* cp1251_ukrainian_ci - bug #29461 */ cs_number == 26)) || /* cp1250_general_ci - bug #29461 */ (mysql_version < 50124 && - (cs_number == 33 || /* utf8_general_ci - bug #27877 */ + (cs_number == 33 || /* utf8mb3_general_ci - bug #27877 */ cs_number == 35))) /* ucs2_general_ci - bug #27877 */ return HA_ADMIN_NEEDS_UPGRADE; } diff --git a/sql/item.h b/sql/item.h index c556b851b03..2808286cc90 100644 --- a/sql/item.h +++ b/sql/item.h @@ -4508,11 +4508,11 @@ class Item_empty_string :public Item_partition_func_safe_string { public: Item_empty_string(THD *thd, const LEX_CSTRING &header, uint length, - CHARSET_INFO *cs= &my_charset_utf8_general_ci) + CHARSET_INFO *cs= &my_charset_utf8mb3_general_ci) :Item_partition_func_safe_string(thd, header, length * cs->mbmaxlen, cs) { } Item_empty_string(THD *thd, const char *header, uint length, - CHARSET_INFO *cs= &my_charset_utf8_general_ci) + CHARSET_INFO *cs= &my_charset_utf8mb3_general_ci) :Item_partition_func_safe_string(thd, LEX_CSTRING({header, strlen(header)}), length * cs->mbmaxlen, cs) { } diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h index 2af7ebdf231..f9cd467b0f7 100644 --- a/sql/item_cmpfunc.h +++ b/sql/item_cmpfunc.h @@ -2809,8 +2809,8 @@ public: Regexp_processor_pcre() : m_pcre(NULL), m_conversion_is_needed(true), m_is_const(0), m_library_flags(0), - m_data_charset(&my_charset_utf8_general_ci), - m_library_charset(&my_charset_utf8_general_ci) + m_data_charset(&my_charset_utf8mb3_general_ci), + m_library_charset(&my_charset_utf8mb3_general_ci) { m_pcre_extra.flags= PCRE_EXTRA_MATCH_LIMIT_RECURSION; m_pcre_extra.match_limit_recursion= 100L; @@ -2827,7 +2827,7 @@ public: // Convert text data to utf-8. m_library_charset= data_charset == &my_charset_bin ? - &my_charset_bin : &my_charset_utf8_general_ci; + &my_charset_bin : &my_charset_utf8mb3_general_ci; m_conversion_is_needed= (data_charset != &my_charset_bin) && !my_charset_same(data_charset, m_library_charset); diff --git a/sql/item_jsonfunc.cc b/sql/item_jsonfunc.cc index 14bffc998e3..816f1e8d723 100644 --- a/sql/item_jsonfunc.cc +++ b/sql/item_jsonfunc.cc @@ -595,7 +595,7 @@ String *Item_func_json_quote::val_str(String *str) bool Item_func_json_unquote::fix_length_and_dec() { - collation.set(&my_charset_utf8_general_ci, + collation.set(&my_charset_utf8mb3_general_ci, DERIVATION_COERCIBLE, MY_REPERTOIRE_ASCII); max_length= args[0]->max_length; maybe_null= 1; @@ -640,12 +640,12 @@ String *Item_func_json_unquote::val_str(String *str) return js; str->length(0); - str->set_charset(&my_charset_utf8_general_ci); + str->set_charset(&my_charset_utf8mb3_general_ci); if (str->realloc_with_extra_if_needed(je.value_len) || (c_len= json_unescape(js->charset(), je.value, je.value + je.value_len, - &my_charset_utf8_general_ci, + &my_charset_utf8mb3_general_ci, (uchar *) str->ptr(), (uchar *) (str->ptr() + je.value_len))) < 0) goto error; @@ -675,7 +675,7 @@ static int alloc_tmp_paths(THD *thd, uint n_paths, return 1; for (uint c_path=0; c_path < n_paths; c_path++) - (*tmp_paths)[c_path].set_charset(&my_charset_utf8_general_ci); + (*tmp_paths)[c_path].set_charset(&my_charset_utf8mb3_general_ci); } return 0; @@ -2614,7 +2614,7 @@ longlong Item_func_json_depth::val_int() bool Item_func_json_type::fix_length_and_dec() { - collation.set(&my_charset_utf8_general_ci); + collation.set(&my_charset_utf8mb3_general_ci); max_length= 12; maybe_null= 1; return FALSE; @@ -2660,7 +2660,7 @@ String *Item_func_json_type::val_str(String *str) break; } - str->set(type, strlen(type), &my_charset_utf8_general_ci); + str->set(type, strlen(type), &my_charset_utf8mb3_general_ci); return str; error: @@ -3562,7 +3562,7 @@ int Arg_comparator::compare_json_str_basic(Item *j, Item *s) if (value2.realloc_with_extra_if_needed(je.value_len) || (c_len= json_unescape(js->charset(), je.value, je.value + je.value_len, - &my_charset_utf8_general_ci, + &my_charset_utf8mb3_general_ci, (uchar *) value2.ptr(), (uchar *) (value2.ptr() + je.value_len))) < 0) goto error; @@ -3611,7 +3611,7 @@ int Arg_comparator::compare_e_json_str_basic(Item *j, Item *s) if (value1.realloc_with_extra_if_needed(value_len) || (c_len= json_unescape(value1.charset(), (uchar *) value, (uchar *) value+value_len, - &my_charset_utf8_general_ci, + &my_charset_utf8mb3_general_ci, (uchar *) value1.ptr(), (uchar *) (value1.ptr() + value_len))) < 0) return 1; diff --git a/sql/item_timefunc.cc b/sql/item_timefunc.cc index 2c527154108..de0c5bd520b 100644 --- a/sql/item_timefunc.cc +++ b/sql/item_timefunc.cc @@ -986,7 +986,7 @@ String* Item_func_monthname::val_str(String* str) return (String *) 0; month_name= locale->month_names->type_names[d.get_mysql_time()->month - 1]; - str->copy(month_name, (uint) strlen(month_name), &my_charset_utf8_bin, + str->copy(month_name, (uint) strlen(month_name), &my_charset_utf8mb3_bin, collation.collation, &err); return str; } @@ -1130,7 +1130,7 @@ String* Item_func_dayname::val_str(String* str) return (String*) 0; day_name= locale->day_names->type_names[weekday]; - str->copy(day_name, (uint) strlen(day_name), &my_charset_utf8_bin, + str->copy(day_name, (uint) strlen(day_name), &my_charset_utf8mb3_bin, collation.collation, &err); return str; } diff --git a/sql/mysqld.cc b/sql/mysqld.cc index e00700850a6..506f624f974 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -5369,13 +5369,13 @@ static void test_lc_time_sz() for (const char **month= (*loc)->month_names->type_names; *month; month++) { set_if_bigger(max_month_len, - my_numchars_mb(&my_charset_utf8_general_ci, + my_numchars_mb(&my_charset_utf8mb3_general_ci, *month, *month + strlen(*month))); } for (const char **day= (*loc)->day_names->type_names; *day; day++) { set_if_bigger(max_day_len, - my_numchars_mb(&my_charset_utf8_general_ci, + my_numchars_mb(&my_charset_utf8mb3_general_ci, *day, *day + strlen(*day))); } if ((*loc)->max_month_name_length != max_month_len || @@ -5436,7 +5436,7 @@ int mysqld_main(int argc, char **argv) remaining_argv= argv; /* Must be initialized early for comparison of options name */ - system_charset_info= &my_charset_utf8_general_ci; + system_charset_info= &my_charset_utf8mb3_general_ci; sys_var_init(); @@ -5966,7 +5966,7 @@ int mysqld_main(int argc, char **argv) "MySQLShutdown"), 10); /* Must be initialized early for comparison of service name */ - system_charset_info= &my_charset_utf8_general_ci; + system_charset_info= &my_charset_utf8mb3_general_ci; if (my_init()) { @@ -7891,9 +7891,9 @@ static int mysql_init_variables(void) key_map_full.set_all(); /* Character sets */ - system_charset_info= &my_charset_utf8_general_ci; - files_charset_info= &my_charset_utf8_general_ci; - national_charset_info= &my_charset_utf8_general_ci; + system_charset_info= &my_charset_utf8mb3_general_ci; + files_charset_info= &my_charset_utf8mb3_general_ci; + national_charset_info= &my_charset_utf8mb3_general_ci; table_alias_charset= &my_charset_bin; character_set_filesystem= &my_charset_bin; diff --git a/sql/sql_acl.cc b/sql/sql_acl.cc index 0fb357fd105..11639666c10 100644 --- a/sql/sql_acl.cc +++ b/sql/sql_acl.cc @@ -2244,7 +2244,7 @@ bool acl_init(bool dont_read_acl_tables) acl_cache= new Hash_filo<acl_entry>(ACL_CACHE_SIZE, 0, 0, (my_hash_get_key) acl_entry_get_key, (my_hash_free_key) free, - &my_charset_utf8_bin); + &my_charset_utf8mb3_bin); /* cache built-in native authentication plugins, @@ -2681,10 +2681,10 @@ bool acl_reload(THD *thd) my_init_dynamic_array(&acl_users, sizeof(ACL_USER), 50, 100, MYF(0)); acl_dbs.init(50, 100); my_init_dynamic_array(&acl_proxy_users, sizeof(ACL_PROXY_USER), 50, 100, MYF(0)); - my_hash_init2(&acl_roles,50, &my_charset_utf8_bin, + my_hash_init2(&acl_roles,50, &my_charset_utf8mb3_bin, 0, 0, 0, (my_hash_get_key) acl_role_get_key, 0, (void (*)(void *))free_acl_role, 0); - my_hash_init2(&acl_roles_mappings, 50, &my_charset_utf8_bin, 0, 0, 0, + my_hash_init2(&acl_roles_mappings, 50, &my_charset_utf8mb3_bin, 0, 0, 0, (my_hash_get_key) acl_role_map_get_key, 0, 0, 0); old_mem= acl_memroot; delete_dynamic(&acl_wild_hosts); @@ -7572,16 +7572,16 @@ static bool grant_load(THD *thd, Sql_mode_instant_remove sms(thd, MODE_PAD_CHAR_TO_FULL_LENGTH); - (void) my_hash_init(&column_priv_hash, &my_charset_utf8_bin, + (void) my_hash_init(&column_priv_hash, &my_charset_utf8mb3_bin, 0,0,0, (my_hash_get_key) get_grant_table, (my_hash_free_key) free_grant_table,0); - (void) my_hash_init(&proc_priv_hash, &my_charset_utf8_bin, + (void) my_hash_init(&proc_priv_hash, &my_charset_utf8mb3_bin, 0,0,0, (my_hash_get_key) get_grant_table, 0,0); - (void) my_hash_init(&func_priv_hash, &my_charset_utf8_bin, + (void) my_hash_init(&func_priv_hash, &my_charset_utf8mb3_bin, 0,0,0, (my_hash_get_key) get_grant_table, 0,0); - (void) my_hash_init(&package_spec_priv_hash, &my_charset_utf8_bin, + (void) my_hash_init(&package_spec_priv_hash, &my_charset_utf8mb3_bin, 0,0,0, (my_hash_get_key) get_grant_table, 0,0); - (void) my_hash_init(&package_body_priv_hash, &my_charset_utf8_bin, + (void) my_hash_init(&package_body_priv_hash, &my_charset_utf8mb3_bin, 0,0,0, (my_hash_get_key) get_grant_table, 0,0); init_sql_alloc(&grant_memroot, "GRANT", ACL_ALLOC_BLOCK_SIZE, 0, MYF(0)); @@ -11305,7 +11305,7 @@ bool sp_revoke_privileges(THD *thd, const char *sp_db, const char *sp_name, for (counter= 0, revoked= 0 ; counter < hash->records ; ) { GRANT_NAME *grant_proc= (GRANT_NAME*) my_hash_element(hash, counter); - if (!my_strcasecmp(&my_charset_utf8_bin, grant_proc->db, sp_db) && + if (!my_strcasecmp(&my_charset_utf8mb3_bin, grant_proc->db, sp_db) && !my_strcasecmp(system_charset_info, grant_proc->tname, sp_name)) { LEX_USER lex_user; diff --git a/sql/sql_class.h b/sql/sql_class.h index 63c53c3332e..494de4b6959 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -6743,8 +6743,8 @@ public: bool eq(const Database_qualified_name *other) const { CHARSET_INFO *cs= lower_case_table_names ? - &my_charset_utf8_general_ci : - &my_charset_utf8_bin; + &my_charset_utf8mb3_general_ci : + &my_charset_utf8mb3_bin; return m_db.length == other->m_db.length && m_name.length == other->m_name.length && diff --git a/sql/sql_digest.cc b/sql/sql_digest.cc index 10a9547d80f..5ca855c9608 100644 --- a/sql/sql_digest.cc +++ b/sql/sql_digest.cc @@ -188,7 +188,7 @@ void compute_digest_text(const sql_digest_storage* digest_storage, /* Convert text to utf8 */ const CHARSET_INFO *from_cs= get_charset(digest_storage->m_charset_number, MYF(0)); - const CHARSET_INFO *to_cs= &my_charset_utf8_bin; + const CHARSET_INFO *to_cs= &my_charset_utf8mb3_bin; if (from_cs == NULL) { diff --git a/sql/sql_error.cc b/sql/sql_error.cc index a11a0f454a2..9f541c775fa 100644 --- a/sql/sql_error.cc +++ b/sql/sql_error.cc @@ -744,7 +744,7 @@ void push_warning_printf(THD *thd, Sql_condition::enum_warning_level level, DBUG_ASSERT(format != NULL); va_start(args,format); - my_vsnprintf_ex(&my_charset_utf8_general_ci, warning, + my_vsnprintf_ex(&my_charset_utf8mb3_general_ci, warning, sizeof(warning), format, args); va_end(args); push_warning(thd, level, code, warning); diff --git a/sql/sql_error.h b/sql/sql_error.h index bb83d8af800..5055c1a61cf 100644 --- a/sql/sql_error.h +++ b/sql/sql_error.h @@ -307,16 +307,16 @@ protected: String m_cursor_name; Sql_condition_items() - :m_class_origin((const char*) NULL, 0, & my_charset_utf8_bin), - m_subclass_origin((const char*) NULL, 0, & my_charset_utf8_bin), - m_constraint_catalog((const char*) NULL, 0, & my_charset_utf8_bin), - m_constraint_schema((const char*) NULL, 0, & my_charset_utf8_bin), - m_constraint_name((const char*) NULL, 0, & my_charset_utf8_bin), - m_catalog_name((const char*) NULL, 0, & my_charset_utf8_bin), - m_schema_name((const char*) NULL, 0, & my_charset_utf8_bin), - m_table_name((const char*) NULL, 0, & my_charset_utf8_bin), - m_column_name((const char*) NULL, 0, & my_charset_utf8_bin), - m_cursor_name((const char*) NULL, 0, & my_charset_utf8_bin) + :m_class_origin((const char*) NULL, 0, & my_charset_utf8mb3_bin), + m_subclass_origin((const char*) NULL, 0, & my_charset_utf8mb3_bin), + m_constraint_catalog((const char*) NULL, 0, & my_charset_utf8mb3_bin), + m_constraint_schema((const char*) NULL, 0, & my_charset_utf8mb3_bin), + m_constraint_name((const char*) NULL, 0, & my_charset_utf8mb3_bin), + m_catalog_name((const char*) NULL, 0, & my_charset_utf8mb3_bin), + m_schema_name((const char*) NULL, 0, & my_charset_utf8mb3_bin), + m_table_name((const char*) NULL, 0, & my_charset_utf8mb3_bin), + m_column_name((const char*) NULL, 0, & my_charset_utf8mb3_bin), + m_cursor_name((const char*) NULL, 0, & my_charset_utf8mb3_bin) { } void clear() diff --git a/sql/sql_get_diagnostics.cc b/sql/sql_get_diagnostics.cc index b3ae423b914..197bf5e7a00 100644 --- a/sql/sql_get_diagnostics.cc +++ b/sql/sql_get_diagnostics.cc @@ -266,8 +266,8 @@ Condition_information::aggregate(THD *thd, const Diagnostics_area *da) Item * Condition_information_item::make_utf8_string_item(THD *thd, const String *str) { - /* Default is utf8 character set and utf8_general_ci collation. */ - CHARSET_INFO *to_cs= &my_charset_utf8_general_ci; + /* Default is utf8 character set and utf8mb3_general_ci collation. */ + CHARSET_INFO *to_cs= &my_charset_utf8mb3_general_ci; /* If a charset was not set, assume that no conversion is needed. */ CHARSET_INFO *from_cs= str->charset() ? str->charset() : to_cs; String tmp(str->ptr(), str->length(), from_cs); diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index a2cfc0d0128..e0801e9f43a 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -349,7 +349,7 @@ size_t Lex_input_stream::get_body_utf8_maximum_length(THD *thd) "2" should be a reasonable multiplier that safely covers escaping needs. */ return (m_buf_length / thd->variables.character_set_client->mbminlen) * - my_charset_utf8_bin.mbmaxlen * 2/*for escaping*/; + my_charset_utf8mb3_bin.mbmaxlen * 2/*for escaping*/; } @@ -454,14 +454,14 @@ extern "C" { @param end - the end of the destination string @returns - a code according to the wc_mb() convension. */ -int my_wc_mb_utf8_with_escape(CHARSET_INFO *cs, my_wc_t escape, my_wc_t wc, - uchar *str, uchar *end) +int my_wc_mb_utf8mb3_with_escape(CHARSET_INFO *cs, my_wc_t escape, my_wc_t wc, + uchar *str, uchar *end) { DBUG_ASSERT(escape > 0); if (str + 1 >= end) return MY_CS_TOOSMALL2; // Not enough space, need at least two bytes. *str= (uchar)escape; - int cnvres= my_charset_utf8_handler.wc_mb(cs, wc, str + 1, end); + int cnvres= my_charset_utf8mb3_handler.wc_mb(cs, wc, str + 1, end); if (cnvres > 0) return cnvres + 1; // The character was normally put if (cnvres == MY_CS_ILUNI) @@ -483,12 +483,12 @@ int my_wc_mb_utf8_with_escape(CHARSET_INFO *cs, my_wc_t escape, my_wc_t wc, @param end - the end of the destination string @returns - a code according to the wc_mb() conversion. */ -int my_wc_mb_utf8_opt_escape(CHARSET_INFO *cs, - my_wc_t wc, my_wc_t escape, my_wc_t ewc, - uchar *str, uchar *end) +int my_wc_mb_utf8mb3_opt_escape(CHARSET_INFO *cs, + my_wc_t wc, my_wc_t escape, my_wc_t ewc, + uchar *str, uchar *end) { - return escape ? my_wc_mb_utf8_with_escape(cs, escape, ewc, str, end) : - my_charset_utf8_handler.wc_mb(cs, wc, str, end); + return escape ? my_wc_mb_utf8mb3_with_escape(cs, escape, ewc, str, end) : + my_charset_utf8mb3_handler.wc_mb(cs, wc, str, end); } /** @@ -507,54 +507,55 @@ int my_wc_mb_utf8_opt_escape(CHARSET_INFO *cs, @param escape - the escape character (backslash, or 0) @returns - a code according to the wc_mb() convension. */ -int my_wc_mb_utf8_escape(CHARSET_INFO *cs, my_wc_t wc, uchar *str, uchar *end, - my_wc_t sep, my_wc_t escape) +int my_wc_mb_utf8mb3_escape(CHARSET_INFO *cs, my_wc_t wc, + uchar *str, uchar *end, + my_wc_t sep, my_wc_t escape) { DBUG_ASSERT(escape == 0 || escape == '\\'); DBUG_ASSERT(sep == '"' || sep == '\''); switch (wc) { - case 0: return my_wc_mb_utf8_opt_escape(cs, wc, escape, '0', str, end); - case '\t': return my_wc_mb_utf8_opt_escape(cs, wc, escape, 't', str, end); - case '\r': return my_wc_mb_utf8_opt_escape(cs, wc, escape, 'r', str, end); - case '\n': return my_wc_mb_utf8_opt_escape(cs, wc, escape, 'n', str, end); - case '\032': return my_wc_mb_utf8_opt_escape(cs, wc, escape, 'Z', str, end); + case 0: return my_wc_mb_utf8mb3_opt_escape(cs, wc, escape, '0', str, end); + case '\t': return my_wc_mb_utf8mb3_opt_escape(cs, wc, escape, 't', str, end); + case '\r': return my_wc_mb_utf8mb3_opt_escape(cs, wc, escape, 'r', str, end); + case '\n': return my_wc_mb_utf8mb3_opt_escape(cs, wc, escape, 'n', str, end); + case '\032': return my_wc_mb_utf8mb3_opt_escape(cs, wc, escape, 'Z', str, end); case '\'': case '\"': if (wc == sep) - return my_wc_mb_utf8_with_escape(cs, wc, wc, str, end); + return my_wc_mb_utf8mb3_with_escape(cs, wc, wc, str, end); } - return my_charset_utf8_handler.wc_mb(cs, wc, str, end); // No escaping needed + return my_charset_utf8mb3_handler.wc_mb(cs, wc, str, end); // No escaping needed } /** wc_mb() compatible routines for all sql_mode and delimiter combinations */ -int my_wc_mb_utf8_escape_single_quote_and_backslash(CHARSET_INFO *cs, +int my_wc_mb_utf8mb3_escape_single_quote_and_backslash(CHARSET_INFO *cs, my_wc_t wc, uchar *str, uchar *end) { - return my_wc_mb_utf8_escape(cs, wc, str, end, '\'', '\\'); + return my_wc_mb_utf8mb3_escape(cs, wc, str, end, '\'', '\\'); } -int my_wc_mb_utf8_escape_double_quote_and_backslash(CHARSET_INFO *cs, +int my_wc_mb_utf8mb3_escape_double_quote_and_backslash(CHARSET_INFO *cs, my_wc_t wc, uchar *str, uchar *end) { - return my_wc_mb_utf8_escape(cs, wc, str, end, '"', '\\'); + return my_wc_mb_utf8mb3_escape(cs, wc, str, end, '"', '\\'); } -int my_wc_mb_utf8_escape_single_quote(CHARSET_INFO *cs, my_wc_t wc, +int my_wc_mb_utf8mb3_escape_single_quote(CHARSET_INFO *cs, my_wc_t wc, uchar *str, uchar *end) { - return my_wc_mb_utf8_escape(cs, wc, str, end, '\'', 0); + return my_wc_mb_utf8mb3_escape(cs, wc, str, end, '\'', 0); } -int my_wc_mb_utf8_escape_double_quote(CHARSET_INFO *cs, my_wc_t wc, +int my_wc_mb_utf8mb3_escape_double_quote(CHARSET_INFO *cs, my_wc_t wc, uchar *str, uchar *end) { - return my_wc_mb_utf8_escape(cs, wc, str, end, '"', 0); + return my_wc_mb_utf8mb3_escape(cs, wc, str, end, '"', 0); } }; // End of extern "C" @@ -568,10 +569,10 @@ my_charset_conv_wc_mb Lex_input_stream::get_escape_func(THD *thd, my_wc_t sep) const { return thd->backslash_escapes() ? - (sep == '"' ? my_wc_mb_utf8_escape_double_quote_and_backslash: - my_wc_mb_utf8_escape_single_quote_and_backslash) : - (sep == '"' ? my_wc_mb_utf8_escape_double_quote: - my_wc_mb_utf8_escape_single_quote); + (sep == '"' ? my_wc_mb_utf8mb3_escape_double_quote_and_backslash: + my_wc_mb_utf8mb3_escape_single_quote_and_backslash) : + (sep == '"' ? my_wc_mb_utf8mb3_escape_double_quote: + my_wc_mb_utf8mb3_escape_single_quote); } @@ -611,7 +612,7 @@ void Lex_input_stream::body_utf8_append_escape(THD *thd, DBUG_ASSERT(m_body_utf8 + get_body_utf8_maximum_length(thd) >= m_body_utf8_ptr + txt->length * 2); uint32 cnv_length= my_convert_using_func(m_body_utf8_ptr, txt->length * 2, - &my_charset_utf8_general_ci, + &my_charset_utf8mb3_general_ci, get_escape_func(thd, sep), txt->str, txt->length, cs, cs->cset->mb_wc, diff --git a/sql/sql_signal.cc b/sql/sql_signal.cc index 320a954711a..1b1d2fe0f31 100644 --- a/sql/sql_signal.cc +++ b/sql/sql_signal.cc @@ -151,7 +151,7 @@ static int assign_condition_item(MEM_ROOT *mem_root, const char* name, THD *thd, Item *set, String *ci) { char str_buff[(64+1)*4]; /* Room for a null terminated UTF8 String 64 */ - String str_value(str_buff, sizeof(str_buff), & my_charset_utf8_bin); + String str_value(str_buff, sizeof(str_buff), & my_charset_utf8mb3_bin); String *str; bool truncated; @@ -164,7 +164,7 @@ static int assign_condition_item(MEM_ROOT *mem_root, const char* name, THD *thd, } str= set->val_str(& str_value); - truncated= assign_fixed_string(mem_root, & my_charset_utf8_bin, 64, ci, str); + truncated= assign_fixed_string(mem_root, & my_charset_utf8mb3_bin, 64, ci, str); if (truncated) { if (thd->is_strict_mode()) @@ -260,7 +260,7 @@ int Sql_cmd_common_signal::eval_signal_informations(THD *thd, Sql_condition *con bool truncated; String utf8_text; str= set->val_str(& str_value); - truncated= assign_fixed_string(thd->mem_root, & my_charset_utf8_bin, + truncated= assign_fixed_string(thd->mem_root, & my_charset_utf8mb3_bin, MYSQL_ERRMSG_SIZE, & utf8_text, str); if (truncated) diff --git a/sql/sql_type.cc b/sql/sql_type.cc index 14f368af069..a71874819fa 100644 --- a/sql/sql_type.cc +++ b/sql/sql_type.cc @@ -3916,7 +3916,7 @@ bool Type_handler_string_result:: the query should return only the row with 'oe'. It should not return 'o-umlaut', because 'o-umlaut' does not match the right part of the condition: a='oe' - ('o-umlaut' is not equal to 'oe' in utf8_general_ci, + ('o-umlaut' is not equal to 'oe' in utf8mb3_general_ci, which is the collation of the field "a"). If we change the right part from: diff --git a/sql/xa.cc b/sql/xa.cc index c4b983aa4f5..e30c3bfadba 100644 --- a/sql/xa.cc +++ b/sql/xa.cc @@ -814,7 +814,7 @@ static my_bool xa_recover_callback_verbose(XID_cache_element *xs, char buf[SQL_XIDSIZE]; uint len= get_sql_xid(&xs->xid, buf); return xa_recover_callback(xs, protocol, buf, len, - &my_charset_utf8_general_ci); + &my_charset_utf8mb3_general_ci); } @@ -842,7 +842,7 @@ bool mysql_xa_recover(THD *thd) if (thd->lex->verbose) { len= SQL_XIDSIZE; - cs= &my_charset_utf8_general_ci; + cs= &my_charset_utf8mb3_general_ci; action= (my_hash_walk_action) xa_recover_callback_verbose; } else diff --git a/storage/cassandra/ha_cassandra.cc b/storage/cassandra/ha_cassandra.cc index fbce6b61f17..e0127ba752a 100644 --- a/storage/cassandra/ha_cassandra.cc +++ b/storage/cassandra/ha_cassandra.cc @@ -1104,7 +1104,7 @@ bool cassandra_to_dyncol_strUTF8(const char *cass_data, MEM_ROOT *mem_root __attribute__((unused))) { return cassandra_to_dyncol_strStr(cass_data, cass_data_len, value, - &my_charset_utf8_unicode_ci); + &my_charset_utf8mb3_unicode_ci); } bool dyncol_to_cassandraUTF8(DYNAMIC_COLUMN_VALUE *value, @@ -1112,7 +1112,7 @@ bool dyncol_to_cassandraUTF8(DYNAMIC_COLUMN_VALUE *value, void* buff, void **freemem) { return dyncol_to_cassandraStr(value, cass_data, cass_data_len, - buff, freemem, &my_charset_utf8_unicode_ci); + buff, freemem, &my_charset_utf8mb3_unicode_ci); } bool cassandra_to_dyncol_strUUID(const char *cass_data, diff --git a/storage/connect/ha_connect.cc b/storage/connect/ha_connect.cc index 7aff7796490..2474cb05b62 100644 --- a/storage/connect/ha_connect.cc +++ b/storage/connect/ha_connect.cc @@ -5294,7 +5294,7 @@ static char *encode(PGLOBAL g, const char *cnm) char *buf= (char*)PlugSubAlloc(g, NULL, strlen(cnm) * 3); uint dummy_errors; uint32 len= copy_and_convert(buf, strlen(cnm) * 3, - &my_charset_utf8_general_ci, + &my_charset_utf8mb3_general_ci, cnm, strlen(cnm), &my_charset_latin1, &dummy_errors); @@ -6331,7 +6331,7 @@ int ha_connect::create(const char *name, TABLE *table_arg, DBUG_RETURN(HA_ERR_INTERNAL_ERROR); } // endif charset - if (type == TAB_XML && data_charset != &my_charset_utf8_general_ci) { + if (type == TAB_XML && data_charset != &my_charset_utf8mb3_general_ci) { my_printf_error(ER_UNKNOWN_ERROR, "DATA_CHARSET='%s' is not supported for TABLE_TYPE=XML", MYF(0), options->data_charset); diff --git a/storage/connect/libdoc.cpp b/storage/connect/libdoc.cpp index 9b30b315441..a0a18c84817 100644 --- a/storage/connect/libdoc.cpp +++ b/storage/connect/libdoc.cpp @@ -765,8 +765,8 @@ int LIBXMLDOC::Decode(xmlChar *cnt, char *buf, int n) { const char *txt = (const char *)cnt; uint dummy_errors; - uint32 len= copy_and_convert(buf, n, &my_charset_utf8_general_ci, txt, - strlen(txt), &my_charset_utf8_general_ci, + uint32 len= copy_and_convert(buf, n, &my_charset_utf8mb3_general_ci, txt, + strlen(txt), &my_charset_utf8mb3_general_ci, &dummy_errors); buf[len]= '\0'; return 0; @@ -777,8 +777,8 @@ int LIBXMLDOC::Decode(xmlChar *cnt, char *buf, int n) /******************************************************************/ xmlChar *LIBXMLDOC::Encode(PGLOBAL g, char *txt) { - const CHARSET_INFO *ics= &my_charset_utf8_general_ci; - const CHARSET_INFO *ocs= &my_charset_utf8_general_ci; + const CHARSET_INFO *ics= &my_charset_utf8mb3_general_ci; + const CHARSET_INFO *ocs= &my_charset_utf8mb3_general_ci; size_t i = strlen(txt); size_t o = i * ocs->mbmaxlen / ics->mbmaxlen + 1; char *buf; diff --git a/storage/connect/tabext.cpp b/storage/connect/tabext.cpp index aaf14f123c6..5f803aa1269 100644 --- a/storage/connect/tabext.cpp +++ b/storage/connect/tabext.cpp @@ -278,7 +278,7 @@ int TDBEXT::Decode(PCSZ txt, char *buf, size_t n) uint dummy_errors; uint32 len = copy_and_convert(buf, n, &my_charset_latin1, txt, strlen(txt), - &my_charset_utf8_general_ci, + &my_charset_utf8mb3_general_ci, &dummy_errors); buf[len] = '\0'; return 0; diff --git a/storage/connect/tabutil.cpp b/storage/connect/tabutil.cpp index 325f36b1e19..b82e98204dd 100644 --- a/storage/connect/tabutil.cpp +++ b/storage/connect/tabutil.cpp @@ -674,7 +674,7 @@ char *PRXCOL::Decode(PGLOBAL g, const char *cnm) uint32 len= copy_and_convert(buf, strlen(cnm) + 1, &my_charset_latin1, cnm, strlen(cnm), - &my_charset_utf8_general_ci, + &my_charset_utf8mb3_general_ci, &dummy_errors); buf[len]= '\0'; return buf; diff --git a/storage/connect/tabxml.cpp b/storage/connect/tabxml.cpp index 19490d350e8..6ea01285a61 100644 --- a/storage/connect/tabxml.cpp +++ b/storage/connect/tabxml.cpp @@ -694,7 +694,7 @@ PTDB TDBXML::Clone(PTABS t) /***********************************************************************/ const CHARSET_INFO *TDBXML::data_charset() { - return &my_charset_utf8_general_ci; + return &my_charset_utf8mb3_general_ci; } // end of data_charset /***********************************************************************/ diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 23e860f9da2..fce893cb68c 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -9770,7 +9770,7 @@ ha_innobase::ft_init_ext( buf_tmp_used = innobase_convert_string( buf_tmp, sizeof(buf_tmp) - 1, - &my_charset_utf8_general_ci, + &my_charset_utf8mb3_general_ci, query, query_len, (CHARSET_INFO*) char_set, &num_errors); diff --git a/storage/perfschema/pfs_defaults.cc b/storage/perfschema/pfs_defaults.cc index 4171786ea68..194877e0e38 100644 --- a/storage/perfschema/pfs_defaults.cc +++ b/storage/perfschema/pfs_defaults.cc @@ -42,17 +42,17 @@ void install_default_setup(PSI_bootstrap *boot) /* LF_HASH needs a thread, for PINS */ psi->set_thread(psi_thread); - String percent("%", 1, &my_charset_utf8_bin); + String percent("%", 1, &my_charset_utf8mb3_bin); /* Enable all users on all hosts by default */ insert_setup_actor(&percent, &percent, &percent); /* Disable system tables by default */ - String mysql_db("mysql", 5, &my_charset_utf8_bin); + String mysql_db("mysql", 5, &my_charset_utf8mb3_bin); insert_setup_object(OBJECT_TYPE_TABLE, &mysql_db, &percent, false, false); /* Disable performance/information schema tables. */ - String PS_db("performance_schema", 18, &my_charset_utf8_bin); - String IS_db("information_schema", 18, &my_charset_utf8_bin); + String PS_db("performance_schema", 18, &my_charset_utf8mb3_bin); + String IS_db("information_schema", 18, &my_charset_utf8mb3_bin); insert_setup_object(OBJECT_TYPE_TABLE, &PS_db, &percent, false, false); insert_setup_object(OBJECT_TYPE_TABLE, &IS_db, &percent, false, false); diff --git a/storage/perfschema/pfs_engine_table.cc b/storage/perfschema/pfs_engine_table.cc index b133988e908..75ba41e9dd4 100644 --- a/storage/perfschema/pfs_engine_table.cc +++ b/storage/perfschema/pfs_engine_table.cc @@ -368,7 +368,7 @@ void PFS_engine_table::set_field_char_utf8(Field *f, const char* str, { DBUG_ASSERT(f->real_type() == MYSQL_TYPE_STRING); Field_string *f2= (Field_string*) f; - f2->store(str, len, &my_charset_utf8_bin); + f2->store(str, len, &my_charset_utf8mb3_bin); } void PFS_engine_table::set_field_varchar_utf8(Field *f, const char* str, @@ -376,7 +376,7 @@ void PFS_engine_table::set_field_varchar_utf8(Field *f, const char* str, { DBUG_ASSERT(f->real_type() == MYSQL_TYPE_VARCHAR); Field_varstring *f2= (Field_varstring*) f; - f2->store(str, len, &my_charset_utf8_bin); + f2->store(str, len, &my_charset_utf8mb3_bin); } void PFS_engine_table::set_field_longtext_utf8(Field *f, const char* str, @@ -384,7 +384,7 @@ void PFS_engine_table::set_field_longtext_utf8(Field *f, const char* str, { DBUG_ASSERT(f->real_type() == MYSQL_TYPE_BLOB); Field_blob *f2= (Field_blob*) f; - f2->store(str, len, &my_charset_utf8_bin); + f2->store(str, len, &my_charset_utf8mb3_bin); } void PFS_engine_table::set_field_enum(Field *f, ulonglong value) diff --git a/storage/perfschema/table_session_connect.cc b/storage/perfschema/table_session_connect.cc index 74b953b034e..b296726ef79 100644 --- a/storage/perfschema/table_session_connect.cc +++ b/storage/perfschema/table_session_connect.cc @@ -73,7 +73,7 @@ bool parse_length_encoded_string(const char **ptr, if (*ptr - start_ptr + data_length > input_length) return true; - copy_length= copier.well_formed_copy(&my_charset_utf8_bin, dest, dest_size, + copy_length= copier.well_formed_copy(&my_charset_utf8mb3_bin, dest, dest_size, from_cs, *ptr, data_length, nchars_max); *copied_len= copy_length; (*ptr)+= data_length; diff --git a/storage/perfschema/table_setup_actors.cc b/storage/perfschema/table_setup_actors.cc index b5058f8cd85..e7c20341e94 100644 --- a/storage/perfschema/table_setup_actors.cc +++ b/storage/perfschema/table_setup_actors.cc @@ -56,9 +56,9 @@ int table_setup_actors::write_row(TABLE *table, unsigned char *buf, Field **fields) { Field *f; - String user_data("%", 1, &my_charset_utf8_bin); - String host_data("%", 1, &my_charset_utf8_bin); - String role_data("%", 1, &my_charset_utf8_bin); + String user_data("%", 1, &my_charset_utf8mb3_bin); + String host_data("%", 1, &my_charset_utf8mb3_bin); + String role_data("%", 1, &my_charset_utf8mb3_bin); String *user= &user_data; String *host= &host_data; String *role= &role_data; @@ -248,7 +248,7 @@ int table_setup_actors::delete_row_values(TABLE *table, { DBUG_ASSERT(m_row_exists); - CHARSET_INFO *cs= &my_charset_utf8_bin; + CHARSET_INFO *cs= &my_charset_utf8mb3_bin; String user(m_row.m_username, m_row.m_username_length, cs); String role(m_row.m_rolename, m_row.m_rolename_length, cs); String host(m_row.m_hostname, m_row.m_hostname_length, cs); diff --git a/storage/perfschema/table_setup_objects.cc b/storage/perfschema/table_setup_objects.cc index e15c55b41ae..98de9b22416 100644 --- a/storage/perfschema/table_setup_objects.cc +++ b/storage/perfschema/table_setup_objects.cc @@ -72,8 +72,8 @@ int table_setup_objects::write_row(TABLE *table, unsigned char *buf, int result; Field *f; enum_object_type object_type= OBJECT_TYPE_TABLE; - String object_schema_data("%", 1, &my_charset_utf8_bin); - String object_name_data("%", 1, &my_charset_utf8_bin); + String object_schema_data("%", 1, &my_charset_utf8mb3_bin); + String object_name_data("%", 1, &my_charset_utf8mb3_bin); String *object_schema= &object_schema_data; String *object_name= &object_name_data; enum_yes_no enabled_value= ENUM_YES; @@ -312,7 +312,7 @@ int table_setup_objects::delete_row_values(TABLE *table, { DBUG_ASSERT(m_row_exists); - CHARSET_INFO *cs= &my_charset_utf8_bin; + CHARSET_INFO *cs= &my_charset_utf8mb3_bin; enum_object_type object_type= OBJECT_TYPE_TABLE; String object_schema(m_row.m_schema_name, m_row.m_schema_name_length, cs); String object_name(m_row.m_object_name, m_row.m_object_name_length, cs); diff --git a/storage/perfschema/unittest/pfs_connect_attr-t.cc b/storage/perfschema/unittest/pfs_connect_attr-t.cc index 3dd62ca5662..c0adabc18bc 100644 --- a/storage/perfschema/unittest/pfs_connect_attr-t.cc +++ b/storage/perfschema/unittest/pfs_connect_attr-t.cc @@ -40,7 +40,7 @@ void test_blob_parser() unsigned char packet[10000], *ptr; uint name_len, value_len, idx, packet_length; bool result; - const CHARSET_INFO *cs= &my_charset_utf8_bin; + const CHARSET_INFO *cs= &my_charset_utf8mb3_bin; diag("test_blob_parser"); @@ -157,7 +157,7 @@ void test_multibyte_lengths() char name[100], value[4096]; uint name_len, value_len; bool result; - const CHARSET_INFO *cs= &my_charset_utf8_bin; + const CHARSET_INFO *cs= &my_charset_utf8mb3_bin; unsigned char var_len_packet[] = { 252, 2, 0, 'k', '1', @@ -190,7 +190,7 @@ void test_utf8_parser() char name[33 * 6], value[1024 * 6], packet[1500 * 6], *ptr; uint name_len, value_len; bool result; - const CHARSET_INFO *cs= &my_charset_utf8_bin; + const CHARSET_INFO *cs= &my_charset_utf8mb3_bin; /* note : this is encoded in utf-8 */ const char *attr1= "Георги"; @@ -242,7 +242,7 @@ void test_utf8_parser_bad_encoding() char name[33 * 3], value[1024 * 3], packet[1500 * 3], *ptr; uint name_len, value_len; bool result; - const CHARSET_INFO *cs= &my_charset_utf8_bin; + const CHARSET_INFO *cs= &my_charset_utf8mb3_bin; /* note : this is encoded in utf-8 */ const char *attr= "Георги"; diff --git a/storage/spider/spd_db_mysql.cc b/storage/spider/spd_db_mysql.cc index 58351195a61..3b89f5e64f2 100644 --- a/storage/spider/spd_db_mysql.cc +++ b/storage/spider/spd_db_mysql.cc @@ -47,7 +47,7 @@ #include "spd_sys_table.h" #include "spd_table.h" -extern struct charset_info_st *spd_charset_utf8_bin; +extern struct charset_info_st *spd_charset_utf8mb3_bin; extern bool volatile *spd_abort_loop; extern handlerton *spider_hton_ptr; @@ -1857,7 +1857,7 @@ int spider_db_mbase::init() DBUG_ENTER("spider_db_mbase::init"); DBUG_PRINT("info",("spider this=%p", this)); if ( - my_hash_init(&lock_table_hash, spd_charset_utf8_bin, 32, 0, 0, + my_hash_init(&lock_table_hash, spd_charset_utf8mb3_bin, 32, 0, 0, (my_hash_get_key) spider_link_get_key, 0, 0) ) { DBUG_RETURN(HA_ERR_OUT_OF_MEM); diff --git a/storage/spider/spd_db_oracle.cc b/storage/spider/spd_db_oracle.cc index 773c6b90ed9..8577dbb7f97 100644 --- a/storage/spider/spd_db_oracle.cc +++ b/storage/spider/spd_db_oracle.cc @@ -48,7 +48,7 @@ #include "spd_sys_table.h" #include "spd_table.h" -extern struct charset_info_st *spd_charset_utf8_bin; +extern struct charset_info_st *spd_charset_utf8mb3_bin; extern handlerton *spider_hton_ptr; extern pthread_mutex_t spider_open_conn_mutex; @@ -1148,7 +1148,7 @@ int spider_db_oracle::init() DBUG_ENTER("spider_db_oracle::init"); DBUG_PRINT("info",("spider this=%p", this)); if ( - my_hash_init(&lock_table_hash, spd_charset_utf8_bin, 32, 0, 0, + my_hash_init(&lock_table_hash, spd_charset_utf8mb3_bin, 32, 0, 0, (my_hash_get_key) spider_link_get_key, 0, 0) ) { DBUG_RETURN(HA_ERR_OUT_OF_MEM); diff --git a/storage/spider/spd_table.cc b/storage/spider/spd_table.cc index d6aa9356c6d..3c8ec286998 100644 --- a/storage/spider/spd_table.cc +++ b/storage/spider/spd_table.cc @@ -119,7 +119,7 @@ uint *spd_db_att_xid_cache_split_num; pthread_mutex_t *spd_db_att_LOCK_xid_cache; HASH *spd_db_att_xid_cache; #endif -struct charset_info_st *spd_charset_utf8_bin; +struct charset_info_st *spd_charset_utf8mb3_bin; const char **spd_defaults_extra_file; const char **spd_defaults_file; const char **spd_mysqld_unix_port; @@ -6088,7 +6088,7 @@ SPIDER_PARTITION_SHARE *spider_get_pt_share( } if( - my_hash_init(&partition_share->pt_handler_hash, spd_charset_utf8_bin, + my_hash_init(&partition_share->pt_handler_hash, spd_charset_utf8mb3_bin, 32, 0, 0, (my_hash_get_key) spider_pt_handler_share_get_key, 0, 0) ) { *error_num = HA_ERR_OUT_OF_MEM; @@ -6929,8 +6929,8 @@ int spider_db_init( GetProcAddress(current_module, "?xid_cache@@3Ust_hash@@A"); #endif #endif - spd_charset_utf8_bin = (struct charset_info_st *) - GetProcAddress(current_module, "my_charset_utf8_bin"); + spd_charset_utf8mb3_bin = (struct charset_info_st *) + GetProcAddress(current_module, "my_charset_utf8mb3_bin"); spd_defaults_extra_file = (const char **) GetProcAddress(current_module, "my_defaults_extra_file"); spd_defaults_file = (const char **) @@ -6962,7 +6962,7 @@ int spider_db_init( spd_db_att_xid_cache = &xid_cache; #endif #endif - spd_charset_utf8_bin = &my_charset_utf8_bin; + spd_charset_utf8mb3_bin = &my_charset_utf8mb3_bin; spd_defaults_extra_file = &my_defaults_extra_file; spd_defaults_file = &my_defaults_file; spd_mysqld_unix_port = (const char **) &mysqld_unix_port; @@ -7097,7 +7097,7 @@ int spider_db_init( #endif goto error_mem_calc_mutex_init; - if (my_hash_init(&spider_open_tables, spd_charset_utf8_bin, 32, 0, 0, + if (my_hash_init(&spider_open_tables, spd_charset_utf8mb3_bin, 32, 0, 0, (my_hash_get_key) spider_tbl_get_key, 0, 0)) goto error_open_tables_hash_init; @@ -7106,7 +7106,7 @@ int spider_db_init( spider_open_tables, spider_open_tables.array.max_element * spider_open_tables.array.size_of_element); - if (my_hash_init(&spider_init_error_tables, spd_charset_utf8_bin, 32, 0, 0, + if (my_hash_init(&spider_init_error_tables, spd_charset_utf8mb3_bin, 32, 0, 0, (my_hash_get_key) spider_tbl_get_key, 0, 0)) goto error_init_error_tables_hash_init; @@ -7116,7 +7116,7 @@ int spider_db_init( spider_init_error_tables.array.max_element * spider_init_error_tables.array.size_of_element); #ifdef WITH_PARTITION_STORAGE_ENGINE - if (my_hash_init(&spider_open_pt_share, spd_charset_utf8_bin, 32, 0, 0, + if (my_hash_init(&spider_open_pt_share, spd_charset_utf8mb3_bin, 32, 0, 0, (my_hash_get_key) spider_pt_share_get_key, 0, 0)) goto error_open_pt_share_hash_init; @@ -7126,7 +7126,7 @@ int spider_db_init( spider_open_pt_share.array.max_element * spider_open_pt_share.array.size_of_element); #endif - if (my_hash_init(&spider_lgtm_tblhnd_share_hash, spd_charset_utf8_bin, + if (my_hash_init(&spider_lgtm_tblhnd_share_hash, spd_charset_utf8mb3_bin, 32, 0, 0, (my_hash_get_key) spider_lgtm_tblhnd_share_hash_get_key, 0, 0)) @@ -7137,11 +7137,11 @@ int spider_db_init( spider_lgtm_tblhnd_share_hash, spider_lgtm_tblhnd_share_hash.array.max_element * spider_lgtm_tblhnd_share_hash.array.size_of_element); - if (my_hash_init(&spider_open_connections, spd_charset_utf8_bin, 32, 0, 0, + if (my_hash_init(&spider_open_connections, spd_charset_utf8mb3_bin, 32, 0, 0, (my_hash_get_key) spider_conn_get_key, 0, 0)) goto error_open_connections_hash_init; - if (my_hash_init(&spider_ipport_conns, spd_charset_utf8_bin, 32, 0, 0, + if (my_hash_init(&spider_ipport_conns, spd_charset_utf8mb3_bin, 32, 0, 0, (my_hash_get_key) spider_ipport_conn_get_key, spider_free_ipport_conn, 0)) goto error_ipport_conn__hash_init; @@ -7152,7 +7152,7 @@ int spider_db_init( spider_open_connections.array.max_element * spider_open_connections.array.size_of_element); #if defined(HS_HAS_SQLCOM) && defined(HAVE_HANDLERSOCKET) - if (my_hash_init(&spider_hs_r_conn_hash, spd_charset_utf8_bin, 32, 0, 0, + if (my_hash_init(&spider_hs_r_conn_hash, spd_charset_utf8mb3_bin, 32, 0, 0, (my_hash_get_key) spider_conn_get_key, 0, 0)) goto error_hs_r_conn_hash_init; @@ -7161,7 +7161,7 @@ int spider_db_init( spider_hs_r_conn_hash, spider_hs_r_conn_hash.array.max_element * spider_hs_r_conn_hash.array.size_of_element); - if (my_hash_init(&spider_hs_w_conn_hash, spd_charset_utf8_bin, 32, 0, 0, + if (my_hash_init(&spider_hs_w_conn_hash, spd_charset_utf8mb3_bin, 32, 0, 0, (my_hash_get_key) spider_conn_get_key, 0, 0)) goto error_hs_w_conn_hash_init; @@ -7171,7 +7171,7 @@ int spider_db_init( spider_hs_w_conn_hash.array.max_element * spider_hs_w_conn_hash.array.size_of_element); #endif - if (my_hash_init(&spider_allocated_thds, spd_charset_utf8_bin, 32, 0, 0, + if (my_hash_init(&spider_allocated_thds, spd_charset_utf8mb3_bin, 32, 0, 0, (my_hash_get_key) spider_allocated_thds_get_key, 0, 0)) goto error_allocated_thds_hash_init; @@ -7233,7 +7233,7 @@ int spider_db_init( roop_count++) { if (my_hash_init(&spider_udf_table_mon_list_hash[roop_count], - spd_charset_utf8_bin, 32, 0, 0, + spd_charset_utf8mb3_bin, 32, 0, 0, (my_hash_get_key) spider_udf_tbl_mon_list_key, 0, 0)) goto error_init_udf_table_mon_list_hash; diff --git a/storage/spider/spd_trx.cc b/storage/spider/spd_trx.cc index 876dda19561..91ddb8d354c 100644 --- a/storage/spider/spd_trx.cc +++ b/storage/spider/spd_trx.cc @@ -49,7 +49,7 @@ extern uint *spd_db_att_xid_cache_split_num; extern pthread_mutex_t *spd_db_att_LOCK_xid_cache; extern HASH *spd_db_att_xid_cache; #endif -extern struct charset_info_st *spd_charset_utf8_bin; +extern struct charset_info_st *spd_charset_utf8mb3_bin; extern handlerton *spider_hton_ptr; extern SPIDER_DBTON spider_dbton[SPIDER_DBTON_SIZE]; @@ -1228,7 +1228,7 @@ SPIDER_TRX *spider_get_trx( } if ( - my_hash_init(&trx->trx_conn_hash, spd_charset_utf8_bin, 32, 0, 0, + my_hash_init(&trx->trx_conn_hash, spd_charset_utf8mb3_bin, 32, 0, 0, (my_hash_get_key) spider_conn_get_key, 0, 0) ) goto error_init_hash; @@ -1240,7 +1240,7 @@ SPIDER_TRX *spider_get_trx( trx->trx_conn_hash.array.size_of_element); if ( - my_hash_init(&trx->trx_another_conn_hash, spd_charset_utf8_bin, 32, 0, 0, + my_hash_init(&trx->trx_another_conn_hash, spd_charset_utf8mb3_bin, 32, 0, 0, (my_hash_get_key) spider_conn_get_key, 0, 0) ) goto error_init_another_hash; @@ -1253,7 +1253,7 @@ SPIDER_TRX *spider_get_trx( #if defined(HS_HAS_SQLCOM) && defined(HAVE_HANDLERSOCKET) if ( - my_hash_init(&trx->trx_hs_r_conn_hash, spd_charset_utf8_bin, 32, 0, 0, + my_hash_init(&trx->trx_hs_r_conn_hash, spd_charset_utf8mb3_bin, 32, 0, 0, (my_hash_get_key) spider_conn_get_key, 0, 0) ) goto error_hs_r_init_hash; @@ -1265,7 +1265,7 @@ SPIDER_TRX *spider_get_trx( trx->trx_hs_r_conn_hash.array.size_of_element); if ( - my_hash_init(&trx->trx_hs_w_conn_hash, spd_charset_utf8_bin, 32, 0, 0, + my_hash_init(&trx->trx_hs_w_conn_hash, spd_charset_utf8mb3_bin, 32, 0, 0, (my_hash_get_key) spider_conn_get_key, 0, 0) ) goto error_hs_w_init_hash; @@ -1279,7 +1279,7 @@ SPIDER_TRX *spider_get_trx( #if defined(HS_HAS_SQLCOM) && defined(HAVE_HANDLERSOCKET) if ( - my_hash_init(&trx->trx_direct_hs_r_conn_hash, spd_charset_utf8_bin, 32, + my_hash_init(&trx->trx_direct_hs_r_conn_hash, spd_charset_utf8mb3_bin, 32, 0, 0, (my_hash_get_key) spider_conn_get_key, 0, 0) ) goto error_direct_hs_r_init_hash; @@ -1291,7 +1291,7 @@ SPIDER_TRX *spider_get_trx( trx->trx_direct_hs_r_conn_hash.array.size_of_element); if ( - my_hash_init(&trx->trx_direct_hs_w_conn_hash, spd_charset_utf8_bin, 32, + my_hash_init(&trx->trx_direct_hs_w_conn_hash, spd_charset_utf8mb3_bin, 32, 0, 0, (my_hash_get_key) spider_conn_get_key, 0, 0) ) goto error_direct_hs_w_init_hash; @@ -1304,7 +1304,7 @@ SPIDER_TRX *spider_get_trx( #endif if ( - my_hash_init(&trx->trx_alter_table_hash, spd_charset_utf8_bin, 32, 0, 0, + my_hash_init(&trx->trx_alter_table_hash, spd_charset_utf8mb3_bin, 32, 0, 0, (my_hash_get_key) spider_alter_tbl_get_key, 0, 0) ) goto error_init_alter_hash; @@ -1316,7 +1316,7 @@ SPIDER_TRX *spider_get_trx( trx->trx_alter_table_hash.array.size_of_element); if ( - my_hash_init(&trx->trx_ha_hash, spd_charset_utf8_bin, 32, 0, 0, + my_hash_init(&trx->trx_ha_hash, spd_charset_utf8mb3_bin, 32, 0, 0, (my_hash_get_key) spider_trx_ha_get_key, 0, 0) ) goto error_init_trx_ha_hash; diff --git a/strings/CHARSET_INFO.txt b/strings/CHARSET_INFO.txt index 6f0a810be37..922a372495b 100644 --- a/strings/CHARSET_INFO.txt +++ b/strings/CHARSET_INFO.txt @@ -129,7 +129,7 @@ In all Asian charsets these arrays are set up as follows: In Unicode character sets we have full support of UPPER/LOWER mapping, for sorting order, and for character type detection. -"utf8_general_ci" still has the "old-fashioned" arrays +"utf8mb3_general_ci" still has the "old-fashioned" arrays like to_upper, to_lower, sort_order and ctype, but they are not really used (maybe only in some rare legacy functions). diff --git a/strings/ctype-mb.ic b/strings/ctype-mb.ic index 336c482d24f..6cde31a34ad 100644 --- a/strings/ctype-mb.ic +++ b/strings/ctype-mb.ic @@ -167,7 +167,7 @@ MY_FUNCTION_NAME(well_formed_char_length)(CHARSET_INFO *cs __attribute__((unused /** Returns well formed length of a string measured in characters (rather than in bytes). - Version for character sets that define CHARLEN(), e.g. utf8. + Version for character sets that define CHARLEN(), e.g. utf8mb3. CHARLEN(cs,b,e) must use the same return code convension that mb_wc() does: - a positive number in the range [1-mbmaxlen] if a valid single-byte or multi-byte character was found diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c index 312b903ea64..99a0d0f46ae 100644 --- a/strings/ctype-uca.c +++ b/strings/ctype-uca.c @@ -32312,7 +32312,7 @@ static my_coll_lexem_num my_coll_lexem_next(MY_COLL_LEXEM *lexem) /* Escaped character, e.g. \u1234 */ if ((*beg == '\\') && (beg + 2 < lexem->end) && - (beg[1] == 'u') && my_isxdigit(&my_charset_utf8_general_ci, beg[2])) + (beg[1] == 'u') && my_isxdigit(&my_charset_utf8mb3_general_ci, beg[2])) { int ch; @@ -32341,7 +32341,7 @@ static my_coll_lexem_num my_coll_lexem_next(MY_COLL_LEXEM *lexem) if (((uchar) *beg) > 0x7F) /* Unescaped multibyte character */ { - CHARSET_INFO *cs= &my_charset_utf8_general_ci; + CHARSET_INFO *cs= &my_charset_utf8mb3_general_ci; my_wc_t wc; int nbytes= cs->cset->mb_wc(cs, &wc, (uchar *) beg, (uchar *) lexem->end); @@ -33720,7 +33720,7 @@ static my_bool my_coll_init_uca(struct charset_info_st *cs, MY_CHARSET_LOADER *loader) { cs->pad_char= ' '; - cs->ctype= my_charset_utf8_unicode_ci.ctype; + cs->ctype= my_charset_utf8mb3_unicode_ci.ctype; if (!cs->caseinfo) cs->caseinfo= &my_unicase_default; return create_tailoring(cs, loader); @@ -33894,7 +33894,7 @@ struct charset_info_st my_charset_ucs2_unicode_ci= { 128,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ + "ucs2", /* cs name */ "ucs2_unicode_ci", /* name */ "", /* comment */ "", /* tailoring */ @@ -33926,7 +33926,7 @@ struct charset_info_st my_charset_ucs2_icelandic_uca_ci= { 129,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ + "ucs2", /* cs name */ "ucs2_icelandic_ci",/* name */ "", /* comment */ icelandic, /* tailoring */ @@ -33958,7 +33958,7 @@ struct charset_info_st my_charset_ucs2_latvian_uca_ci= { 130,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ + "ucs2", /* cs name */ "ucs2_latvian_ci", /* name */ "", /* comment */ latvian, /* tailoring */ @@ -33990,7 +33990,7 @@ struct charset_info_st my_charset_ucs2_romanian_uca_ci= { 131,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ + "ucs2", /* cs name */ "ucs2_romanian_ci", /* name */ "", /* comment */ romanian, /* tailoring */ @@ -34022,7 +34022,7 @@ struct charset_info_st my_charset_ucs2_slovenian_uca_ci= { 132,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ + "ucs2", /* cs name */ "ucs2_slovenian_ci",/* name */ "", /* comment */ slovenian, /* tailoring */ @@ -34054,7 +34054,7 @@ struct charset_info_st my_charset_ucs2_polish_uca_ci= { 133,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ + "ucs2", /* cs name */ "ucs2_polish_ci", /* name */ "", /* comment */ polish, /* tailoring */ @@ -34086,7 +34086,7 @@ struct charset_info_st my_charset_ucs2_estonian_uca_ci= { 134,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ + "ucs2", /* cs name */ "ucs2_estonian_ci", /* name */ "", /* comment */ estonian, /* tailoring */ @@ -34118,7 +34118,7 @@ struct charset_info_st my_charset_ucs2_spanish_uca_ci= { 135,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ + "ucs2", /* cs name */ "ucs2_spanish_ci", /* name */ "", /* comment */ spanish, /* tailoring */ @@ -34150,7 +34150,7 @@ struct charset_info_st my_charset_ucs2_swedish_uca_ci= { 136,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ + "ucs2", /* cs name */ "ucs2_swedish_ci", /* name */ "", /* comment */ swedish, /* tailoring */ @@ -34182,7 +34182,7 @@ struct charset_info_st my_charset_ucs2_turkish_uca_ci= { 137,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ + "ucs2", /* cs name */ "ucs2_turkish_ci", /* name */ "", /* comment */ turkish, /* tailoring */ @@ -34214,7 +34214,7 @@ struct charset_info_st my_charset_ucs2_czech_uca_ci= { 138,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ + "ucs2", /* cs name */ "ucs2_czech_ci", /* name */ "", /* comment */ czech, /* tailoring */ @@ -34247,7 +34247,7 @@ struct charset_info_st my_charset_ucs2_danish_uca_ci= { 139,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ + "ucs2", /* cs name */ "ucs2_danish_ci", /* name */ "", /* comment */ danish, /* tailoring */ @@ -34279,8 +34279,8 @@ struct charset_info_st my_charset_ucs2_lithuanian_uca_ci= { 140,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ - "ucs2_lithuanian_ci",/* name */ + "ucs2", /* cs name */ + "ucs2_lithuanian_ci",/* name */ "", /* comment */ lithuanian, /* tailoring */ NULL, /* ctype */ @@ -34311,7 +34311,7 @@ struct charset_info_st my_charset_ucs2_slovak_uca_ci= { 141,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ + "ucs2", /* cs name */ "ucs2_slovak_ci", /* name */ "", /* comment */ slovak, /* tailoring */ @@ -34343,7 +34343,7 @@ struct charset_info_st my_charset_ucs2_spanish2_uca_ci= { 142,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ + "ucs2", /* cs name */ "ucs2_spanish2_ci", /* name */ "", /* comment */ spanish2, /* tailoring */ @@ -34376,7 +34376,7 @@ struct charset_info_st my_charset_ucs2_roman_uca_ci= { 143,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ + "ucs2", /* cs name */ "ucs2_roman_ci", /* name */ "", /* comment */ roman, /* tailoring */ @@ -34409,7 +34409,7 @@ struct charset_info_st my_charset_ucs2_persian_uca_ci= { 144,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ + "ucs2", /* cs name */ "ucs2_persian_ci", /* name */ "", /* comment */ persian, /* tailoring */ @@ -34442,7 +34442,7 @@ struct charset_info_st my_charset_ucs2_esperanto_uca_ci= { 145,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ + "ucs2", /* cs name */ "ucs2_esperanto_ci",/* name */ "", /* comment */ esperanto, /* tailoring */ @@ -34475,7 +34475,7 @@ struct charset_info_st my_charset_ucs2_hungarian_uca_ci= { 146,0,0, /* number */ MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ + "ucs2", /* cs name */ "ucs2_hungarian_ci",/* name */ "", /* comment */ hungarian, /* tailoring */ @@ -34506,8 +34506,8 @@ struct charset_info_st my_charset_ucs2_hungarian_uca_ci= struct charset_info_st my_charset_ucs2_sinhala_uca_ci= { 147,0,0, /* number */ - MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* csname */ + MY_CS_UCS2_UCA_FLAGS,/* state */ + "ucs2", /* csname */ "ucs2_sinhala_ci", /* name */ "", /* comment */ sinhala, /* tailoring */ @@ -34540,8 +34540,8 @@ struct charset_info_st my_charset_ucs2_sinhala_uca_ci= struct charset_info_st my_charset_ucs2_german2_uca_ci= { 148,0,0, /* number */ - MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* csname */ + MY_CS_UCS2_UCA_FLAGS,/* state */ + "ucs2", /* csname */ "ucs2_german2_ci", /* name */ "", /* comment */ german2, /* tailoring */ @@ -34572,9 +34572,9 @@ struct charset_info_st my_charset_ucs2_german2_uca_ci= struct charset_info_st my_charset_ucs2_croatian_mysql561_uca_ci= { 149,0,0, /* number */ - MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ - "ucs2_croatian_mysql561_ci", /* name */ + MY_CS_UCS2_UCA_FLAGS,/* state */ + "ucs2", /* cs name */ + "ucs2_croatian_mysql561_ci",/* name */ "", /* comment */ croatian_mysql561, /* tailoring */ NULL, /* ctype */ @@ -34605,9 +34605,9 @@ struct charset_info_st my_charset_ucs2_croatian_mysql561_uca_ci= struct charset_info_st my_charset_ucs2_croatian_uca_ci= { MY_PAGE2_COLLATION_ID_UCS2,0,0, /* number */ - MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ - "ucs2_croatian_ci", /* name */ + MY_CS_UCS2_UCA_FLAGS,/* state */ + "ucs2", /* cs name */ + "ucs2_croatian_ci", /* name */ "", /* comment */ croatian_mariadb, /* tailoring */ NULL, /* ctype */ @@ -34638,9 +34638,9 @@ struct charset_info_st my_charset_ucs2_croatian_uca_ci= struct charset_info_st my_charset_ucs2_myanmar_uca_ci= { MY_PAGE2_COLLATION_ID_UCS2+1,0,0, /* number */ - MY_CS_UCS2_UCA_FLAGS,/* state */ - "ucs2", /* cs name */ - "ucs2_myanmar_ci", /* name */ + MY_CS_UCS2_UCA_FLAGS,/* state */ + "ucs2", /* cs name */ + "ucs2_myanmar_ci", /* name */ "", /* comment */ myanmar, /* tailoring */ NULL, /* ctype */ @@ -34736,7 +34736,7 @@ struct charset_info_st my_charset_ucs2_unicode_520_ci= struct charset_info_st my_charset_ucs2_vietnamese_ci= { 151,0,0, /* number */ - MY_CS_UCS2_UCA_FLAGS,/* state */ + MY_CS_UCS2_UCA_FLAGS,/* state */ "ucs2", /* csname */ "ucs2_vietnamese_ci",/* name */ "", /* comment */ @@ -34835,7 +34835,7 @@ struct charset_info_st my_charset_ucs2_unicode_520_nopad_ci= #endif -#ifdef HAVE_CHARSET_utf8 +#ifdef HAVE_CHARSET_utf8mb3 static my_bool my_uca_coll_init_utf8mb3(struct charset_info_st *cs, MY_CHARSET_LOADER *loader); @@ -34876,7 +34876,7 @@ my_uca_coll_init_utf8mb3(struct charset_info_st *cs, MY_CHARSET_LOADER *loader) expressions. Note, there is no need to mark byte 255 as a letter, it is illegal byte in UTF8. */ -static uchar ctype_utf8[] = { +static uchar ctype_utf8mb3[] = { 0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, @@ -34896,20 +34896,20 @@ static uchar ctype_utf8[] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0 }; -extern MY_CHARSET_HANDLER my_charset_utf8_handler; +extern MY_CHARSET_HANDLER my_charset_utf8mb3_handler; #define MY_CS_UTF8MB3_UCA_FLAGS MY_CS_COMMON_UCA_FLAGS #define MY_CS_UTF8MB3_UCA_NOPAD_FLAGS (MY_CS_UTF8MB3_UCA_FLAGS|MY_CS_NOPAD) -struct charset_info_st my_charset_utf8_unicode_ci= +struct charset_info_st my_charset_utf8mb3_unicode_ci= { 192,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_unicode_ci", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_unicode_ci", /* name */ "", /* comment */ "", /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -34929,20 +34929,20 @@ struct charset_info_st my_charset_utf8_unicode_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_icelandic_uca_ci= +struct charset_info_st my_charset_utf8mb3_icelandic_uca_ci= { 193,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_icelandic_ci",/* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_icelandic_ci",/* name */ "", /* comment */ icelandic, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -34962,19 +34962,19 @@ struct charset_info_st my_charset_utf8_icelandic_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_latvian_uca_ci= +struct charset_info_st my_charset_utf8mb3_latvian_uca_ci= { 194,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_latvian_ci", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_latvian_ci",/* name */ "", /* comment */ latvian, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -34994,19 +34994,19 @@ struct charset_info_st my_charset_utf8_latvian_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_romanian_uca_ci= +struct charset_info_st my_charset_utf8mb3_romanian_uca_ci= { 195,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_romanian_ci", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_romanian_ci", /* name */ "", /* comment */ romanian, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35026,19 +35026,19 @@ struct charset_info_st my_charset_utf8_romanian_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_slovenian_uca_ci= +struct charset_info_st my_charset_utf8mb3_slovenian_uca_ci= { 196,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_slovenian_ci",/* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_slovenian_ci",/* name */ "", /* comment */ slovenian, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35058,19 +35058,19 @@ struct charset_info_st my_charset_utf8_slovenian_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_polish_uca_ci= +struct charset_info_st my_charset_utf8mb3_polish_uca_ci= { 197,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_polish_ci", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_polish_ci",/* name */ "", /* comment */ polish, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35090,19 +35090,19 @@ struct charset_info_st my_charset_utf8_polish_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_estonian_uca_ci= +struct charset_info_st my_charset_utf8mb3_estonian_uca_ci= { 198,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_estonian_ci", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_estonian_ci",/* name */ "", /* comment */ estonian, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35122,19 +35122,19 @@ struct charset_info_st my_charset_utf8_estonian_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_spanish_uca_ci= +struct charset_info_st my_charset_utf8mb3_spanish_uca_ci= { 199,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_spanish_ci", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_spanish_ci", /* name */ "", /* comment */ spanish, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35154,19 +35154,19 @@ struct charset_info_st my_charset_utf8_spanish_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_swedish_uca_ci= +struct charset_info_st my_charset_utf8mb3_swedish_uca_ci= { 200,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_swedish_ci", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_swedish_ci", /* name */ "", /* comment */ swedish, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35186,19 +35186,19 @@ struct charset_info_st my_charset_utf8_swedish_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_turkish_uca_ci= +struct charset_info_st my_charset_utf8mb3_turkish_uca_ci= { 201,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_turkish_ci", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_turkish_ci", /* name */ "", /* comment */ turkish, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35218,19 +35218,19 @@ struct charset_info_st my_charset_utf8_turkish_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_czech_uca_ci= +struct charset_info_st my_charset_utf8mb3_czech_uca_ci= { 202,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_czech_ci", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_czech_ci", /* name */ "", /* comment */ czech, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35250,20 +35250,20 @@ struct charset_info_st my_charset_utf8_czech_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_danish_uca_ci= +struct charset_info_st my_charset_utf8mb3_danish_uca_ci= { 203,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_danish_ci", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_danish_ci", /* name */ "", /* comment */ danish, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35283,19 +35283,19 @@ struct charset_info_st my_charset_utf8_danish_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_lithuanian_uca_ci= +struct charset_info_st my_charset_utf8mb3_lithuanian_uca_ci= { 204,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_lithuanian_ci",/* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_lithuanian_ci",/* name */ "", /* comment */ lithuanian, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35315,19 +35315,19 @@ struct charset_info_st my_charset_utf8_lithuanian_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_slovak_uca_ci= +struct charset_info_st my_charset_utf8mb3_slovak_uca_ci= { 205,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_slovak_ci", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_slovak_ci",/* name */ "", /* comment */ slovak, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35347,19 +35347,19 @@ struct charset_info_st my_charset_utf8_slovak_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_spanish2_uca_ci= +struct charset_info_st my_charset_utf8mb3_spanish2_uca_ci= { 206,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_spanish2_ci", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_spanish2_ci",/* name */ "", /* comment */ spanish2, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35379,19 +35379,19 @@ struct charset_info_st my_charset_utf8_spanish2_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_roman_uca_ci= +struct charset_info_st my_charset_utf8mb3_roman_uca_ci= { 207,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_roman_ci", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_roman_ci",/* name */ "", /* comment */ roman, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35411,19 +35411,19 @@ struct charset_info_st my_charset_utf8_roman_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_persian_uca_ci= +struct charset_info_st my_charset_utf8mb3_persian_uca_ci= { 208,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_persian_ci", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_persian_ci",/* name */ "", /* comment */ persian, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35443,19 +35443,19 @@ struct charset_info_st my_charset_utf8_persian_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_esperanto_uca_ci= +struct charset_info_st my_charset_utf8mb3_esperanto_uca_ci= { 209,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_esperanto_ci",/* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_esperanto_ci",/* name */ "", /* comment */ esperanto, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35475,19 +35475,19 @@ struct charset_info_st my_charset_utf8_esperanto_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_hungarian_uca_ci= +struct charset_info_st my_charset_utf8mb3_hungarian_uca_ci= { 210,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_hungarian_ci",/* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_hungarian_ci",/* name */ "", /* comment */ hungarian, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35507,19 +35507,19 @@ struct charset_info_st my_charset_utf8_hungarian_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_sinhala_uca_ci= +struct charset_info_st my_charset_utf8mb3_sinhala_uca_ci= { 211,0,0, /* number */ - MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ - "utf8", /* cs name */ - "utf8_sinhala_ci", /* name */ + MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_sinhala_ci", /* name */ "", /* comment */ sinhala, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35539,20 +35539,20 @@ struct charset_info_st my_charset_utf8_sinhala_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_german2_uca_ci= +struct charset_info_st my_charset_utf8mb3_german2_uca_ci= { 212,0,0, /* number */ - MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ + MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ MY_UTF8MB3, /* cs name */ - MY_UTF8MB3 "_german2_ci",/* name */ + MY_UTF8MB3 "_german2_ci",/* name */ "", /* comment */ german2, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35572,19 +35572,19 @@ struct charset_info_st my_charset_utf8_german2_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_croatian_mysql561_uca_ci= +struct charset_info_st my_charset_utf8mb3_croatian_mysql561_uca_ci= { 213,0,0, /* number */ - MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ + MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ MY_UTF8MB3, /* cs name */ MY_UTF8MB3 "_croatian_mysql561_ci",/* name */ "", /* comment */ croatian_mysql561, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35604,20 +35604,20 @@ struct charset_info_st my_charset_utf8_croatian_mysql561_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_croatian_uca_ci= +struct charset_info_st my_charset_utf8mb3_croatian_uca_ci= { MY_PAGE2_COLLATION_ID_UTF8,0,0, /* number */ - MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ + MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ MY_UTF8MB3, /* cs name */ MY_UTF8MB3 "_croatian_ci",/* name */ "", /* comment */ croatian_mariadb, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35637,20 +35637,20 @@ struct charset_info_st my_charset_utf8_croatian_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_myanmar_uca_ci= +struct charset_info_st my_charset_utf8mb3_myanmar_uca_ci= { MY_PAGE2_COLLATION_ID_UTF8+1,0,0, /* number */ - MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ + MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ MY_UTF8MB3, /* cs name */ - MY_UTF8MB3 "_myanmar_ci",/* name */ + MY_UTF8MB3 "_myanmar_ci",/* name */ "", /* comment */ myanmar, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35670,12 +35670,12 @@ struct charset_info_st my_charset_utf8_myanmar_uca_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_unicode_520_ci= +struct charset_info_st my_charset_utf8mb3_unicode_520_ci= { 214,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ @@ -35683,7 +35683,7 @@ struct charset_info_st my_charset_utf8_unicode_520_ci= MY_UTF8MB3 "_unicode_520_ci",/* name */ "", /* comment */ "", /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35703,11 +35703,11 @@ struct charset_info_st my_charset_utf8_unicode_520_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_thai_520_w2= +struct charset_info_st my_charset_utf8mb3_thai_520_w2= { MY_PAGE2_COLLATION_ID_UTF8+2,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ @@ -35715,7 +35715,7 @@ struct charset_info_st my_charset_utf8_thai_520_w2= MY_UTF8MB3 "_thai_520_w2",/* name */ "", /* comment */ "[strength 2]", /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35735,11 +35735,11 @@ struct charset_info_st my_charset_utf8_thai_520_w2= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 2, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_multilevel_utf8mb3 }; -struct charset_info_st my_charset_utf8_vietnamese_ci= +struct charset_info_st my_charset_utf8mb3_vietnamese_ci= { 215,0,0, /* number */ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */ @@ -35747,7 +35747,7 @@ struct charset_info_st my_charset_utf8_vietnamese_ci= MY_UTF8MB3 "_vietnamese_ci",/* name */ "", /* comment */ vietnamese, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35767,12 +35767,12 @@ struct charset_info_st my_charset_utf8_vietnamese_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_utf8mb3 }; -struct charset_info_st my_charset_utf8_unicode_nopad_ci= +struct charset_info_st my_charset_utf8mb3_unicode_nopad_ci= { MY_NOPAD_ID(192),0,0, /* number */ MY_CS_UTF8MB3_UCA_NOPAD_FLAGS, /* flags */ @@ -35780,7 +35780,7 @@ struct charset_info_st my_charset_utf8_unicode_nopad_ci= MY_UTF8MB3 "_unicode_nopad_ci",/* name */ "", /* comment */ "", /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35800,12 +35800,12 @@ struct charset_info_st my_charset_utf8_unicode_nopad_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_nopad_utf8mb3 }; -struct charset_info_st my_charset_utf8_unicode_520_nopad_ci= +struct charset_info_st my_charset_utf8mb3_unicode_520_nopad_ci= { MY_NOPAD_ID(214),0,0, /* number */ MY_CS_UTF8MB3_UCA_NOPAD_FLAGS, /* flags */ @@ -35813,7 +35813,7 @@ struct charset_info_st my_charset_utf8_unicode_520_nopad_ci= MY_UTF8MB3 "_unicode_520_nopad_ci", /* name */ "", /* comment */ "", /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35833,11 +35833,11 @@ struct charset_info_st my_charset_utf8_unicode_520_nopad_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_uca_collation_handler_nopad_utf8mb3 }; -#endif /* HAVE_CHARSET_utf8 */ +#endif /* HAVE_CHARSET_utf8mb3 */ #ifdef HAVE_CHARSET_utf8mb4 @@ -35883,12 +35883,12 @@ extern MY_CHARSET_HANDLER my_charset_utf8mb4_handler; struct charset_info_st my_charset_utf8mb4_unicode_ci= { 224,0,0, /* number */ - MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_unicode_ci",/* name */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_unicode_ci",/* name */ "", /* comment */ "", /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35916,12 +35916,12 @@ struct charset_info_st my_charset_utf8mb4_unicode_ci= struct charset_info_st my_charset_utf8mb4_icelandic_uca_ci= { 225,0,0, /* number */ - MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_icelandic_ci",/* name */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_icelandic_ci",/* name */ "", /* comment */ icelandic, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35953,7 +35953,7 @@ struct charset_info_st my_charset_utf8mb4_latvian_uca_ci= MY_UTF8MB4 "_latvian_ci", /* name */ "", /* comment */ latvian, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -35980,12 +35980,12 @@ struct charset_info_st my_charset_utf8mb4_latvian_uca_ci= struct charset_info_st my_charset_utf8mb4_romanian_uca_ci= { 227,0,0, /* number */ - MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_romanian_ci", /* name */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_romanian_ci", /* name */ "", /* comment */ romanian, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36012,12 +36012,12 @@ struct charset_info_st my_charset_utf8mb4_romanian_uca_ci= struct charset_info_st my_charset_utf8mb4_slovenian_uca_ci= { 228,0,0, /* number */ - MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_slovenian_ci",/* name */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_slovenian_ci",/* name */ "", /* comment */ slovenian, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36044,12 +36044,12 @@ struct charset_info_st my_charset_utf8mb4_slovenian_uca_ci= struct charset_info_st my_charset_utf8mb4_polish_uca_ci= { 229,0,0, /* number */ - MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_polish_ci", /* name */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_polish_ci", /* name */ "", /* comment */ polish, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36081,7 +36081,7 @@ struct charset_info_st my_charset_utf8mb4_estonian_uca_ci= MY_UTF8MB4 "_estonian_ci", /* name */ "", /* comment */ estonian, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36108,12 +36108,12 @@ struct charset_info_st my_charset_utf8mb4_estonian_uca_ci= struct charset_info_st my_charset_utf8mb4_spanish_uca_ci= { 231,0,0, /* number */ - MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_spanish_ci", /* name */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_spanish_ci", /* name */ "", /* comment */ spanish, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36140,12 +36140,12 @@ struct charset_info_st my_charset_utf8mb4_spanish_uca_ci= struct charset_info_st my_charset_utf8mb4_swedish_uca_ci= { 232,0,0, /* number */ - MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_swedish_ci", /* name */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_swedish_ci", /* name */ "", /* comment */ swedish, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36172,12 +36172,12 @@ struct charset_info_st my_charset_utf8mb4_swedish_uca_ci= struct charset_info_st my_charset_utf8mb4_turkish_uca_ci= { 233,0,0, /* number */ - MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_turkish_ci", /* name */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_turkish_ci", /* name */ "", /* comment */ turkish, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36204,12 +36204,12 @@ struct charset_info_st my_charset_utf8mb4_turkish_uca_ci= struct charset_info_st my_charset_utf8mb4_czech_uca_ci= { 234,0,0, /* number */ - MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_czech_ci", /* name */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_czech_ci", /* name */ "", /* comment */ czech, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36237,12 +36237,12 @@ struct charset_info_st my_charset_utf8mb4_czech_uca_ci= struct charset_info_st my_charset_utf8mb4_danish_uca_ci= { 235,0,0, /* number */ - MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_danish_ci", /* name */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_danish_ci", /* name */ "", /* comment */ danish, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36269,12 +36269,12 @@ struct charset_info_st my_charset_utf8mb4_danish_uca_ci= struct charset_info_st my_charset_utf8mb4_lithuanian_uca_ci= { 236,0,0, /* number */ - MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_lithuanian_ci",/* name */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_lithuanian_ci",/* name */ "", /* comment */ lithuanian, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36301,12 +36301,12 @@ struct charset_info_st my_charset_utf8mb4_lithuanian_uca_ci= struct charset_info_st my_charset_utf8mb4_slovak_uca_ci= { 237,0,0, /* number */ - MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_slovak_ci", /* name */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_slovak_ci", /* name */ "", /* comment */ slovak, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36333,12 +36333,12 @@ struct charset_info_st my_charset_utf8mb4_slovak_uca_ci= struct charset_info_st my_charset_utf8mb4_spanish2_uca_ci= { 238,0,0, /* number */ - MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_spanish2_ci", /* name */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_spanish2_ci", /* name */ "", /* comment */ spanish2, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36365,12 +36365,12 @@ struct charset_info_st my_charset_utf8mb4_spanish2_uca_ci= struct charset_info_st my_charset_utf8mb4_roman_uca_ci= { 239,0,0, /* number */ - MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_roman_ci", /* name */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_roman_ci", /* name */ "", /* comment */ roman, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36397,12 +36397,12 @@ struct charset_info_st my_charset_utf8mb4_roman_uca_ci= struct charset_info_st my_charset_utf8mb4_persian_uca_ci= { 240,0,0, /* number */ - MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_persian_ci", /* name */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_persian_ci", /* name */ "", /* comment */ persian, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36429,12 +36429,12 @@ struct charset_info_st my_charset_utf8mb4_persian_uca_ci= struct charset_info_st my_charset_utf8mb4_esperanto_uca_ci= { 241,0,0, /* number */ - MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_esperanto_ci",/* name */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_esperanto_ci",/* name */ "", /* comment */ esperanto, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36461,12 +36461,12 @@ struct charset_info_st my_charset_utf8mb4_esperanto_uca_ci= struct charset_info_st my_charset_utf8mb4_hungarian_uca_ci= { 242,0,0, /* number */ - MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_hungarian_ci",/* name */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_hungarian_ci",/* name */ "", /* comment */ hungarian, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36494,11 +36494,11 @@ struct charset_info_st my_charset_utf8mb4_sinhala_uca_ci= { 243,0,0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_sinhala_ci",/* name */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_sinhala_ci",/* name */ "", /* comment */ sinhala, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36526,11 +36526,11 @@ struct charset_info_st my_charset_utf8mb4_german2_uca_ci= { 244,0,0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_german2_ci",/* name */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_german2_ci",/* name */ "", /* comment */ german2, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36562,7 +36562,7 @@ struct charset_info_st my_charset_utf8mb4_croatian_mysql561_uca_ci= MY_UTF8MB4 "_croatian_mysql561_ci",/* name */ "", /* comment */ croatian_mysql561, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36591,11 +36591,11 @@ struct charset_info_st my_charset_utf8mb4_croatian_uca_ci= { MY_PAGE2_COLLATION_ID_UTF8MB4,0,0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ + MY_UTF8MB4, /* csname */ MY_UTF8MB4 "_croatian_ci",/* name */ "", /* comment */ croatian_mariadb, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36624,11 +36624,11 @@ struct charset_info_st my_charset_utf8mb4_myanmar_uca_ci= { MY_PAGE2_COLLATION_ID_UTF8MB4+1,0,0, /* number */ MY_CS_UTF8MB4_UCA_FLAGS,/* state */ - MY_UTF8MB4, /* csname */ - MY_UTF8MB4 "_myanmar_ci",/* name */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_myanmar_ci",/* name */ "", /* comment */ myanmar, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36660,7 +36660,7 @@ struct charset_info_st my_charset_utf8mb4_thai_520_w2= MY_UTF8MB4 "_thai_520_w2", /* name */ "", /* comment */ "[strength 2]", /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36692,7 +36692,7 @@ struct charset_info_st my_charset_utf8mb4_unicode_520_ci= MY_UTF8MB4 "_unicode_520_ci",/* name */ "", /* comment */ "", /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36725,7 +36725,7 @@ struct charset_info_st my_charset_utf8mb4_vietnamese_ci= MY_UTF8MB4 "_vietnamese_ci",/* name */ "", /* comment */ vietnamese, /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36758,7 +36758,7 @@ struct charset_info_st my_charset_utf8mb4_unicode_nopad_ci= MY_UTF8MB4 "_unicode_nopad_ci", /* name */ "", /* comment */ "", /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -36791,7 +36791,7 @@ struct charset_info_st my_charset_utf8mb4_unicode_520_nopad_ci= MY_UTF8MB4 "_unicode_520_nopad_ci", /* name */ "", /* comment */ "", /* tailoring */ - ctype_utf8, /* ctype */ + ctype_utf8mb3, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ @@ -37482,7 +37482,7 @@ struct charset_info_st my_charset_utf32_german2_uca_ci= { 180,0,0, /* number */ MY_CS_UTF32_UCA_FLAGS,/* state */ - "utf32", /* csname */ + "utf32", /* csname */ "utf32_german2_ci", /* name */ "", /* comment */ german2, /* tailoring */ diff --git a/strings/ctype-uca.ic b/strings/ctype-uca.ic index 70c10199e3e..b7108eb7f9d 100644 --- a/strings/ctype-uca.ic +++ b/strings/ctype-uca.ic @@ -432,7 +432,7 @@ MY_FUNCTION_NAME(strnncollsp_nopad_multilevel)(CHARSET_INFO *cs, This functions is used for one-level and for multi-level collations. We intentionally use only primary level in multi-level collations. This helps to have PARTITION BY KEY put primarily equal records - into the same partition. E.g. in utf8_thai_520_ci records that differ + into the same partition. E.g. in utf8mb3_thai_520_ci records that differ only in tone marks go into the same partition. RETURN diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index 01c549a7eaa..bef6d198e22 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -49,11 +49,11 @@ -#ifndef HAVE_CHARSET_utf8 -#define HAVE_CHARSET_utf8 +#ifndef HAVE_CHARSET_utf8mb3 +#define HAVE_CHARSET_utf8mb3 #endif -#ifdef HAVE_CHARSET_utf8 +#ifdef HAVE_CHARSET_utf8mb3 #define HAVE_UNIDATA #endif @@ -70,7 +70,7 @@ #endif -#if defined(HAVE_CHARSET_utf8) || defined(HAVE_CHARSET_utf8mb4) +#if defined(HAVE_CHARSET_utf8mb3) || defined(HAVE_CHARSET_utf8mb4) static inline int my_valid_mbcharlen_utf8mb3(const uchar *s, const uchar *e) @@ -106,7 +106,7 @@ int my_valid_mbcharlen_utf8mb3(const uchar *s, const uchar *e) return 3; } -#endif /*HAVE_CHARSET_utf8 || HAVE_CHARSET_utf8mb4*/ +#endif /*HAVE_CHARSET_utf8mb3 || HAVE_CHARSET_utf8mb4*/ #ifdef HAVE_UNIDATA @@ -1737,7 +1737,7 @@ MY_UNICASE_INFO my_unicase_default= /* - Reproduce old utf8_general_ci behaviour before we fixed Bug#27877. + Reproduce old utf8mb3_general_ci behaviour before we fixed Bug#27877. */ MY_UNICASE_CHARACTER *my_unicase_pages_mysql500[256]={ plane00_mysql500, @@ -4750,7 +4750,7 @@ my_strnxfrmlen_unicode_full_bin(CHARSET_INFO *cs, size_t len) #endif /* HAVE_UNIDATA */ -#ifdef HAVE_CHARSET_utf8 +#ifdef HAVE_CHARSET_utf8mb3 /* We consider bytes with code more than 127 as a letter. @@ -4758,7 +4758,7 @@ my_strnxfrmlen_unicode_full_bin(CHARSET_INFO *cs, size_t len) expressions. Note, there is no need to mark byte 255 as a letter, it is illegal byte in UTF8. */ -static const uchar ctype_utf8[] = { +static const uchar ctype_utf8mb3[] = { 0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, @@ -4780,7 +4780,7 @@ static const uchar ctype_utf8[] = { /* The below are taken from usa7 implementation */ -static const uchar to_lower_utf8[] = { +static const uchar to_lower_utf8mb3[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, @@ -4799,7 +4799,7 @@ static const uchar to_lower_utf8[] = { 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255 }; -static const uchar to_upper_utf8[] = { +static const uchar to_upper_utf8mb3[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, @@ -4818,8 +4818,8 @@ static const uchar to_upper_utf8[] = { 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255 }; -static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)), - my_wc_t * pwc, const uchar *s, const uchar *e) +static int my_utf8mb3_uni(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t * pwc, const uchar *s, const uchar *e) { return my_mb_wc_utf8mb3_quick(pwc, s, e); } @@ -4829,8 +4829,8 @@ static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)), The same as above, but without range check for example, for a null-terminated string */ -static int my_utf8_uni_no_range(CHARSET_INFO *cs __attribute__((unused)), - my_wc_t * pwc, const uchar *s) +static int my_utf8mb3_uni_no_range(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t * pwc, const uchar *s) { uchar c; @@ -4865,8 +4865,8 @@ static int my_utf8_uni_no_range(CHARSET_INFO *cs __attribute__((unused)), } -static int my_uni_utf8 (CHARSET_INFO *cs __attribute__((unused)), - my_wc_t wc, uchar *r, uchar *e) +static int my_uni_utf8mb3(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t wc, uchar *r, uchar *e) { if (wc < 0x80) { @@ -4901,8 +4901,8 @@ static int my_uni_utf8 (CHARSET_INFO *cs __attribute__((unused)), /* The same as above, but without range check. */ -static int my_uni_utf8_no_range(CHARSET_INFO *cs __attribute__((unused)), - my_wc_t wc, uchar *r) +static int my_uni_utf8mb3_no_range(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t wc, uchar *r) { int count; @@ -4945,8 +4945,9 @@ my_toupper_utf8mb3(MY_UNICASE_INFO *uni_plane, my_wc_t *wc) } -static size_t my_caseup_utf8(CHARSET_INFO *cs, const char *src, size_t srclen, - char *dst, size_t dstlen) +static size_t my_caseup_utf8mb3(CHARSET_INFO *cs, + const char *src, size_t srclen, + char *dst, size_t dstlen) { my_wc_t wc; int srcres, dstres; @@ -4956,10 +4957,10 @@ static size_t my_caseup_utf8(CHARSET_INFO *cs, const char *src, size_t srclen, DBUG_ASSERT(src != dst || cs->caseup_multiply == 1); while ((src < srcend) && - (srcres= my_utf8_uni(cs, &wc, (uchar *) src, (uchar*) srcend)) > 0) + (srcres= my_utf8mb3_uni(cs, &wc, (uchar *) src, (uchar*) srcend)) > 0) { my_toupper_utf8mb3(uni_plane, &wc); - if ((dstres= my_uni_utf8(cs, wc, (uchar*) dst, (uchar*) dstend)) <= 0) + if ((dstres= my_uni_utf8mb3(cs, wc, (uchar*) dst, (uchar*) dstend)) <= 0) break; src+= srcres; dst+= dstres; @@ -4968,8 +4969,8 @@ static size_t my_caseup_utf8(CHARSET_INFO *cs, const char *src, size_t srclen, } -static void my_hash_sort_utf8_nopad(CHARSET_INFO *cs, const uchar *s, size_t slen, - ulong *nr1, ulong *nr2) +static void my_hash_sort_utf8mb3_nopad(CHARSET_INFO *cs, const uchar *s, size_t slen, + ulong *nr1, ulong *nr2) { my_wc_t wc; int res; @@ -4977,7 +4978,7 @@ static void my_hash_sort_utf8_nopad(CHARSET_INFO *cs, const uchar *s, size_t sle MY_UNICASE_INFO *uni_plane= cs->caseinfo; register ulong m1= *nr1, m2= *nr2; - while ((s < e) && (res=my_utf8_uni(cs,&wc, (uchar *)s, (uchar*)e))>0 ) + while ((s < e) && (res=my_utf8mb3_uni(cs,&wc, (uchar *)s, (uchar*)e))>0 ) { my_tosort_unicode(uni_plane, &wc, cs->state); MY_HASH_ADD_16(m1, m2, wc); @@ -4988,8 +4989,8 @@ static void my_hash_sort_utf8_nopad(CHARSET_INFO *cs, const uchar *s, size_t sle } -static void my_hash_sort_utf8(CHARSET_INFO *cs, const uchar *s, size_t slen, - ulong *nr1, ulong *nr2) +static void my_hash_sort_utf8mb3(CHARSET_INFO *cs, const uchar *s, size_t slen, + ulong *nr1, ulong *nr2) { const uchar *e= s+slen; /* @@ -4998,11 +4999,11 @@ static void my_hash_sort_utf8(CHARSET_INFO *cs, const uchar *s, size_t slen, */ while (e > s && e[-1] == ' ') e--; - my_hash_sort_utf8_nopad(cs, s, e - s, nr1, nr2); + my_hash_sort_utf8mb3_nopad(cs, s, e - s, nr1, nr2); } -static size_t my_caseup_str_utf8(CHARSET_INFO *cs, char *src) +static size_t my_caseup_str_utf8mb3(CHARSET_INFO *cs, char *src) { my_wc_t wc; int srcres, dstres; @@ -5011,10 +5012,10 @@ static size_t my_caseup_str_utf8(CHARSET_INFO *cs, char *src) DBUG_ASSERT(cs->caseup_multiply == 1); while (*src && - (srcres= my_utf8_uni_no_range(cs, &wc, (uchar *) src)) > 0) + (srcres= my_utf8mb3_uni_no_range(cs, &wc, (uchar *) src)) > 0) { my_toupper_utf8mb3(uni_plane, &wc); - if ((dstres= my_uni_utf8_no_range(cs, wc, (uchar*) dst)) <= 0) + if ((dstres= my_uni_utf8mb3_no_range(cs, wc, (uchar*) dst)) <= 0) break; src+= srcres; dst+= dstres; @@ -5024,8 +5025,9 @@ static size_t my_caseup_str_utf8(CHARSET_INFO *cs, char *src) } -static size_t my_casedn_utf8(CHARSET_INFO *cs, const char *src, size_t srclen, - char *dst, size_t dstlen) +static size_t my_casedn_utf8mb3(CHARSET_INFO *cs, + const char *src, size_t srclen, + char *dst, size_t dstlen) { my_wc_t wc; int srcres, dstres; @@ -5035,10 +5037,10 @@ static size_t my_casedn_utf8(CHARSET_INFO *cs, const char *src, size_t srclen, DBUG_ASSERT(src != dst || cs->casedn_multiply == 1); while ((src < srcend) && - (srcres= my_utf8_uni(cs, &wc, (uchar*) src, (uchar*)srcend)) > 0) + (srcres= my_utf8mb3_uni(cs, &wc, (uchar*) src, (uchar*)srcend)) > 0) { my_tolower_utf8mb3(uni_plane, &wc); - if ((dstres= my_uni_utf8(cs, wc, (uchar*) dst, (uchar*) dstend)) <= 0) + if ((dstres= my_uni_utf8mb3(cs, wc, (uchar*) dst, (uchar*) dstend)) <= 0) break; src+= srcres; dst+= dstres; @@ -5047,7 +5049,7 @@ static size_t my_casedn_utf8(CHARSET_INFO *cs, const char *src, size_t srclen, } -static size_t my_casedn_str_utf8(CHARSET_INFO *cs, char *src) +static size_t my_casedn_str_utf8mb3(CHARSET_INFO *cs, char *src) { my_wc_t wc; int srcres, dstres; @@ -5056,10 +5058,10 @@ static size_t my_casedn_str_utf8(CHARSET_INFO *cs, char *src) DBUG_ASSERT(cs->casedn_multiply == 1); while (*src && - (srcres= my_utf8_uni_no_range(cs, &wc, (uchar *) src)) > 0) + (srcres= my_utf8mb3_uni_no_range(cs, &wc, (uchar *) src)) > 0) { my_tolower_utf8mb3(uni_plane, &wc); - if ((dstres= my_uni_utf8_no_range(cs, wc, (uchar*) dst)) <= 0) + if ((dstres= my_uni_utf8mb3_no_range(cs, wc, (uchar*) dst)) <= 0) break; src+= srcres; dst+= dstres; @@ -5070,12 +5072,12 @@ static size_t my_casedn_str_utf8(CHARSET_INFO *cs, char *src) the original string, for example: "U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE" - (which is 0xC4B0 in utf8, i.e. two bytes) + (which is 0xC4B0 in utf8mb3, i.e. two bytes) is converted into "U+0069 LATIN SMALL LETTER I" - (which is 0x69 in utf8, i.e. one byte) + (which is 0x69 in utf8mb3, i.e. one byte) So, we need to put '\0' terminator after converting. */ @@ -5089,7 +5091,7 @@ static size_t my_casedn_str_utf8(CHARSET_INFO *cs, char *src) Compare 0-terminated UTF8 strings. SYNOPSIS - my_strcasecmp_utf8() + my_strcasecmp_utf8mb3() cs character set handler s First 0-terminated string to compare t Second 0-terminated string to compare @@ -5103,7 +5105,7 @@ static size_t my_casedn_str_utf8(CHARSET_INFO *cs, char *src) */ static -int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t) +int my_strcasecmp_utf8mb3(CHARSET_INFO *cs, const char *s, const char *t) { MY_UNICASE_INFO *uni_plane= cs->caseinfo; while (s[0] && t[0]) @@ -5127,19 +5129,19 @@ int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t) /* Scan a multibyte character. - In the future it is worth to write a special version of my_utf8_uni() + In the future it is worth to write a special version of my_utf8mb3_uni() for 0-terminated strings which will not take in account length. Now - we call the regular version of my_utf8_uni() with s+3 in the + we call the regular version of my_utf8mb3_uni() with s+3 in the last argument. s+3 is enough to scan any multibyte sequence. - Calling the regular version of my_utf8_uni is safe for 0-terminated + Calling the regular version of my_utf8mb3_uni is safe for 0-terminated strings: we will never lose the end of the string: If we have 0 character in the middle of a multibyte sequence, - then my_utf8_uni will always return a negative number, so the + then my_utf8mb3_uni will always return a negative number, so the loop with finish. */ - res= my_utf8_uni(cs,&s_wc, (const uchar*)s, (const uchar*) s + 3); + res= my_utf8mb3_uni(cs,&s_wc, (const uchar*)s, (const uchar*) s + 3); /* In the case of wrong multibyte sequence we will @@ -5164,7 +5166,7 @@ int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t) } else { - int res=my_utf8_uni(cs,&t_wc, (const uchar*)t, (const uchar*) t + 3); + int res=my_utf8mb3_uni(cs,&t_wc, (const uchar*)t, (const uchar*) t + 3); if (res <= 0) return strcmp(s, t); t+= res; @@ -5182,10 +5184,10 @@ int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t) static -int my_wildcmp_utf8(CHARSET_INFO *cs, - const char *str,const char *str_end, - const char *wildstr,const char *wildend, - int escape, int w_one, int w_many) +int my_wildcmp_utf8mb3(CHARSET_INFO *cs, + const char *str,const char *str_end, + const char *wildstr,const char *wildend, + int escape, int w_one, int w_many) { MY_UNICASE_INFO *uni_plane= cs->caseinfo; return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend, @@ -5194,8 +5196,8 @@ int my_wildcmp_utf8(CHARSET_INFO *cs, static -int my_charlen_utf8(CHARSET_INFO *cs __attribute__((unused)), - const uchar *s, const uchar *e) +int my_charlen_utf8mb3(CHARSET_INFO *cs __attribute__((unused)), + const uchar *s, const uchar *e) { uchar c; @@ -5210,23 +5212,23 @@ int my_charlen_utf8(CHARSET_INFO *cs __attribute__((unused)), } -#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8 -#define CHARLEN(cs,str,end) my_charlen_utf8(cs,str,end) +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8mb3 +#define CHARLEN(cs,str,end) my_charlen_utf8mb3(cs,str,end) #define DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN #include "ctype-mb.ic" #undef MY_FUNCTION_NAME #undef CHARLEN #undef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN -/* my_well_formed_char_length_utf8 */ +/* my_well_formed_char_length_utf8mb3 */ -static inline int my_weight_mb1_utf8_general_ci(uchar b) +static inline int my_weight_mb1_utf8mb3_general_ci(uchar b) { return (int) my_unicase_default_page00[b & 0xFF].sort; } -static inline int my_weight_mb2_utf8_general_ci(uchar b0, uchar b1) +static inline int my_weight_mb2_utf8mb3_general_ci(uchar b0, uchar b1) { my_wc_t wc= UTF8MB2_CODE(b0, b1); MY_UNICASE_CHARACTER *page= my_unicase_default_pages[wc >> 8]; @@ -5234,7 +5236,7 @@ static inline int my_weight_mb2_utf8_general_ci(uchar b0, uchar b1) } -static inline int my_weight_mb3_utf8_general_ci(uchar b0, uchar b1, uchar b2) +static inline int my_weight_mb3_utf8mb3_general_ci(uchar b0, uchar b1, uchar b2) { my_wc_t wc= UTF8MB3_CODE(b0, b1, b2); MY_UNICASE_CHARACTER *page= my_unicase_default_pages[wc >> 8]; @@ -5242,7 +5244,7 @@ static inline int my_weight_mb3_utf8_general_ci(uchar b0, uchar b1, uchar b2) } -#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8_general_ci +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8mb3_general_ci #define DEFINE_STRNXFRM_UNICODE #define DEFINE_STRNXFRM_UNICODE_NOPAD #define MY_MB_WC(cs, pwc, s, e) my_mb_wc_utf8mb3_quick(pwc, s, e) @@ -5251,28 +5253,28 @@ static inline int my_weight_mb3_utf8_general_ci(uchar b0, uchar b1, uchar b2) #define UNICASE_PAGE0 my_unicase_default_page00 #define UNICASE_PAGES my_unicase_default_pages #define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) -#define WEIGHT_MB1(x) my_weight_mb1_utf8_general_ci(x) -#define WEIGHT_MB2(x,y) my_weight_mb2_utf8_general_ci(x,y) -#define WEIGHT_MB3(x,y,z) my_weight_mb3_utf8_general_ci(x,y,z) +#define WEIGHT_MB1(x) my_weight_mb1_utf8mb3_general_ci(x) +#define WEIGHT_MB2(x,y) my_weight_mb2_utf8mb3_general_ci(x,y) +#define WEIGHT_MB3(x,y,z) my_weight_mb3_utf8mb3_general_ci(x,y,z) #include "strcoll.ic" #define DEFINE_STRNNCOLLSP_NOPAD -#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8_general_nopad_ci +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8mb3_general_nopad_ci #define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) -#define WEIGHT_MB1(x) my_weight_mb1_utf8_general_ci(x) -#define WEIGHT_MB2(x,y) my_weight_mb2_utf8_general_ci(x,y) -#define WEIGHT_MB3(x,y,z) my_weight_mb3_utf8_general_ci(x,y,z) +#define WEIGHT_MB1(x) my_weight_mb1_utf8mb3_general_ci(x) +#define WEIGHT_MB2(x,y) my_weight_mb2_utf8mb3_general_ci(x,y) +#define WEIGHT_MB3(x,y,z) my_weight_mb3_utf8mb3_general_ci(x,y,z) #include "strcoll.ic" -static inline int my_weight_mb1_utf8_general_mysql500_ci(uchar b) +static inline int my_weight_mb1_utf8mb3_general_mysql500_ci(uchar b) { return (int) plane00_mysql500[b & 0xFF].sort; } -static inline int my_weight_mb2_utf8_general_mysql500_ci(uchar b0, uchar b1) +static inline int my_weight_mb2_utf8mb3_general_mysql500_ci(uchar b0, uchar b1) { my_wc_t wc= UTF8MB2_CODE(b0, b1); MY_UNICASE_CHARACTER *page= my_unicase_pages_mysql500[wc >> 8]; @@ -5281,7 +5283,7 @@ static inline int my_weight_mb2_utf8_general_mysql500_ci(uchar b0, uchar b1) static inline int -my_weight_mb3_utf8_general_mysql500_ci(uchar b0, uchar b1, uchar b2) +my_weight_mb3_utf8mb3_general_mysql500_ci(uchar b0, uchar b1, uchar b2) { my_wc_t wc= UTF8MB3_CODE(b0, b1, b2); MY_UNICASE_CHARACTER *page= my_unicase_pages_mysql500[wc >> 8]; @@ -5289,7 +5291,7 @@ my_weight_mb3_utf8_general_mysql500_ci(uchar b0, uchar b1, uchar b2) } -#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8_general_mysql500_ci +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8mb3_general_mysql500_ci #define DEFINE_STRNXFRM_UNICODE #define MY_MB_WC(cs, pwc, s, e) my_mb_wc_utf8mb3_quick(pwc, s, e) #define OPTIMIZE_ASCII 1 @@ -5297,13 +5299,13 @@ my_weight_mb3_utf8_general_mysql500_ci(uchar b0, uchar b1, uchar b2) #define UNICASE_PAGE0 plane00_mysql500 #define UNICASE_PAGES my_unicase_pages_mysql500 #define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) -#define WEIGHT_MB1(x) my_weight_mb1_utf8_general_mysql500_ci(x) -#define WEIGHT_MB2(x,y) my_weight_mb2_utf8_general_mysql500_ci(x,y) -#define WEIGHT_MB3(x,y,z) my_weight_mb3_utf8_general_mysql500_ci(x,y,z) +#define WEIGHT_MB1(x) my_weight_mb1_utf8mb3_general_mysql500_ci(x) +#define WEIGHT_MB2(x,y) my_weight_mb2_utf8mb3_general_mysql500_ci(x,y) +#define WEIGHT_MB3(x,y,z) my_weight_mb3_utf8mb3_general_mysql500_ci(x,y,z) #include "strcoll.ic" -#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8_bin +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8mb3_bin #define DEFINE_STRNXFRM_UNICODE_BIN2 #define MY_MB_WC(cs, pwc, s, e) my_mb_wc_utf8mb3_quick(pwc, s, e) #define OPTIMIZE_ASCII 1 @@ -5315,7 +5317,7 @@ my_weight_mb3_utf8_general_mysql500_ci(uchar b0, uchar b1, uchar b2) #define DEFINE_STRNNCOLLSP_NOPAD -#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8_nopad_bin +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8mb3_nopad_bin #define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) #define WEIGHT_MB1(x) ((int) (uchar) (x)) #define WEIGHT_MB2(x,y) ((int) UTF8MB2_CODE(x,y)) @@ -5326,7 +5328,7 @@ my_weight_mb3_utf8_general_mysql500_ci(uchar b0, uchar b1, uchar b2) TODO-10.2: join this with pad_max_char() in ctype-mb.c */ static void -my_fill_utf8_mb(CHARSET_INFO *cs, char *str, size_t length, int fill) +my_fill_utf8mb3_mb(CHARSET_INFO *cs, char *str, size_t length, int fill) { char *end= str + length; char buf[10]; @@ -5345,53 +5347,53 @@ my_fill_utf8_mb(CHARSET_INFO *cs, char *str, size_t length, int fill) static void -my_fill_utf8(CHARSET_INFO *cs, char *str, size_t length, int fill) +my_fill_utf8mb3(CHARSET_INFO *cs, char *str, size_t length, int fill) { if (fill < 0x80) my_fill_8bit(cs, str, length, fill); else - my_fill_utf8_mb(cs, str, length, fill); + my_fill_utf8mb3_mb(cs, str, length, fill); } -static MY_COLLATION_HANDLER my_collation_utf8_general_ci_handler = +static MY_COLLATION_HANDLER my_collation_utf8mb3_general_ci_handler = { NULL, /* init */ - my_strnncoll_utf8_general_ci, - my_strnncollsp_utf8_general_ci, - my_strnxfrm_utf8_general_ci, + my_strnncoll_utf8mb3_general_ci, + my_strnncollsp_utf8mb3_general_ci, + my_strnxfrm_utf8mb3_general_ci, my_strnxfrmlen_unicode, my_like_range_mb, - my_wildcmp_utf8, - my_strcasecmp_utf8, + my_wildcmp_utf8mb3, + my_strcasecmp_utf8mb3, my_instr_mb, - my_hash_sort_utf8, + my_hash_sort_utf8mb3, my_propagate_complex }; -static MY_COLLATION_HANDLER my_collation_utf8_general_mysql500_ci_handler = +static MY_COLLATION_HANDLER my_collation_utf8mb3_general_mysql500_ci_handler = { NULL, /* init */ - my_strnncoll_utf8_general_mysql500_ci, - my_strnncollsp_utf8_general_mysql500_ci, - my_strnxfrm_utf8_general_mysql500_ci, + my_strnncoll_utf8mb3_general_mysql500_ci, + my_strnncollsp_utf8mb3_general_mysql500_ci, + my_strnxfrm_utf8mb3_general_mysql500_ci, my_strnxfrmlen_unicode, my_like_range_mb, - my_wildcmp_utf8, - my_strcasecmp_utf8, + my_wildcmp_utf8mb3, + my_strcasecmp_utf8mb3, my_instr_mb, - my_hash_sort_utf8, + my_hash_sort_utf8mb3, my_propagate_complex }; -static MY_COLLATION_HANDLER my_collation_utf8_bin_handler = +static MY_COLLATION_HANDLER my_collation_utf8mb3_bin_handler = { NULL, /* init */ - my_strnncoll_utf8_bin, - my_strnncollsp_utf8_bin, - my_strnxfrm_utf8_bin, + my_strnncoll_utf8mb3_bin, + my_strnncollsp_utf8mb3_bin, + my_strnxfrm_utf8mb3_bin, my_strnxfrmlen_unicode, my_like_range_mb, my_wildcmp_mb_bin, @@ -5402,28 +5404,28 @@ static MY_COLLATION_HANDLER my_collation_utf8_bin_handler = }; -static MY_COLLATION_HANDLER my_collation_utf8_general_nopad_ci_handler = +static MY_COLLATION_HANDLER my_collation_utf8mb3_general_nopad_ci_handler = { NULL, /* init */ - my_strnncoll_utf8_general_ci, - my_strnncollsp_utf8_general_nopad_ci, - my_strnxfrm_nopad_utf8_general_ci, + my_strnncoll_utf8mb3_general_ci, + my_strnncollsp_utf8mb3_general_nopad_ci, + my_strnxfrm_nopad_utf8mb3_general_ci, my_strnxfrmlen_unicode, my_like_range_mb, - my_wildcmp_utf8, - my_strcasecmp_utf8, + my_wildcmp_utf8mb3, + my_strcasecmp_utf8mb3, my_instr_mb, - my_hash_sort_utf8_nopad, + my_hash_sort_utf8mb3_nopad, my_propagate_complex }; -static MY_COLLATION_HANDLER my_collation_utf8_nopad_bin_handler = +static MY_COLLATION_HANDLER my_collation_utf8mb3_nopad_bin_handler = { NULL, /* init */ - my_strnncoll_utf8_bin, - my_strnncollsp_utf8_nopad_bin, - my_strnxfrm_nopad_utf8_bin, + my_strnncoll_utf8mb3_bin, + my_strnncollsp_utf8mb3_nopad_bin, + my_strnxfrm_nopad_utf8mb3_bin, my_strnxfrmlen_unicode, my_like_range_mb, my_wildcmp_mb_bin, @@ -5434,24 +5436,24 @@ static MY_COLLATION_HANDLER my_collation_utf8_nopad_bin_handler = }; -MY_CHARSET_HANDLER my_charset_utf8_handler= +MY_CHARSET_HANDLER my_charset_utf8mb3_handler= { NULL, /* init */ my_numchars_mb, my_charpos_mb, my_lengthsp_8bit, my_numcells_mb, - my_utf8_uni, - my_uni_utf8, + my_utf8mb3_uni, + my_uni_utf8mb3, my_mb_ctype_mb, - my_caseup_str_utf8, - my_casedn_str_utf8, - my_caseup_utf8, - my_casedn_utf8, + my_caseup_str_utf8mb3, + my_casedn_str_utf8mb3, + my_caseup_utf8mb3, + my_casedn_utf8mb3, my_snprintf_8bit, my_long10_to_str_8bit, my_longlong10_to_str_8bit, - my_fill_utf8, + my_fill_utf8mb3, my_strntol_8bit, my_strntoul_8bit, my_strntoll_8bit, @@ -5460,26 +5462,26 @@ MY_CHARSET_HANDLER my_charset_utf8_handler= my_strtoll10_8bit, my_strntoull10rnd_8bit, my_scan_8bit, - my_charlen_utf8, - my_well_formed_char_length_utf8, + my_charlen_utf8mb3, + my_well_formed_char_length_utf8mb3, my_copy_fix_mb, - my_uni_utf8, + my_uni_utf8mb3, }; -struct charset_info_st my_charset_utf8_general_ci= +struct charset_info_st my_charset_utf8mb3_general_ci= { 33,0,0, /* number */ MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE, /* state */ - "utf8", /* cs name */ - "utf8_general_ci", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_general_ci", /* name */ "", /* comment */ NULL, /* tailoring */ - ctype_utf8, /* ctype */ - to_lower_utf8, /* to_lower */ - to_upper_utf8, /* to_upper */ - to_upper_utf8, /* sort_order */ + ctype_utf8mb3, /* ctype */ + to_lower_utf8mb3, /* to_lower */ + to_upper_utf8mb3, /* to_upper */ + to_upper_utf8mb3, /* sort_order */ NULL, /* uca */ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ @@ -5496,23 +5498,23 @@ struct charset_info_st my_charset_utf8_general_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, - &my_collation_utf8_general_ci_handler + &my_charset_utf8mb3_handler, + &my_collation_utf8mb3_general_ci_handler }; -struct charset_info_st my_charset_utf8_general_mysql500_ci= +struct charset_info_st my_charset_utf8mb3_general_mysql500_ci= { 223,0,0, /* number */ MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, /* state */ - "utf8", /* cs name */ - "utf8_general_mysql500_ci", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_general_mysql500_ci", /* name */ "", /* comment */ NULL, /* tailoring */ - ctype_utf8, /* ctype */ - to_lower_utf8, /* to_lower */ - to_upper_utf8, /* to_upper */ - to_upper_utf8, /* sort_order */ + ctype_utf8mb3, /* ctype */ + to_lower_utf8mb3, /* to_lower */ + to_upper_utf8mb3, /* to_upper */ + to_upper_utf8mb3, /* sort_order */ NULL, /* uca */ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ @@ -5529,22 +5531,22 @@ struct charset_info_st my_charset_utf8_general_mysql500_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, - &my_collation_utf8_general_mysql500_ci_handler + &my_charset_utf8mb3_handler, + &my_collation_utf8mb3_general_mysql500_ci_handler }; -struct charset_info_st my_charset_utf8_bin= +struct charset_info_st my_charset_utf8mb3_bin= { 83,0,0, /* number */ MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_BINSORT|MY_CS_UNICODE, /* state */ - "utf8", /* cs name */ - "utf8_bin", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_bin", /* name */ "", /* comment */ NULL, /* tailoring */ - ctype_utf8, /* ctype */ - to_lower_utf8, /* to_lower */ - to_upper_utf8, /* to_upper */ + ctype_utf8mb3, /* ctype */ + to_lower_utf8mb3, /* to_lower */ + to_upper_utf8mb3, /* to_upper */ NULL, /* sort_order */ NULL, /* uca */ NULL, /* tab_to_uni */ @@ -5562,23 +5564,23 @@ struct charset_info_st my_charset_utf8_bin= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, - &my_collation_utf8_bin_handler + &my_charset_utf8mb3_handler, + &my_collation_utf8mb3_bin_handler }; -struct charset_info_st my_charset_utf8_general_nopad_ci= +struct charset_info_st my_charset_utf8mb3_general_nopad_ci= { MY_NOPAD_ID(33),0,0, /* number */ MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NOPAD, /* state */ - "utf8", /* cs name */ - "utf8_general_nopad_ci", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_general_nopad_ci", /* name */ "", /* comment */ NULL, /* tailoring */ - ctype_utf8, /* ctype */ - to_lower_utf8, /* to_lower */ - to_upper_utf8, /* to_upper */ - to_upper_utf8, /* sort_order */ + ctype_utf8mb3, /* ctype */ + to_lower_utf8mb3, /* to_lower */ + to_upper_utf8mb3, /* to_upper */ + to_upper_utf8mb3, /* sort_order */ NULL, /* uca */ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ @@ -5595,22 +5597,22 @@ struct charset_info_st my_charset_utf8_general_nopad_ci= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, - &my_collation_utf8_general_nopad_ci_handler + &my_charset_utf8mb3_handler, + &my_collation_utf8mb3_general_nopad_ci_handler }; -struct charset_info_st my_charset_utf8_nopad_bin= +struct charset_info_st my_charset_utf8mb3_nopad_bin= { MY_NOPAD_ID(83),0,0,/* number */ MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NOPAD, - "utf8", /* cs name */ - "utf8_nopad_bin", /* name */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_nopad_bin", /* name */ "", /* comment */ NULL, /* tailoring */ - ctype_utf8, /* ctype */ - to_lower_utf8, /* to_lower */ - to_upper_utf8, /* to_upper */ + ctype_utf8mb3, /* ctype */ + to_lower_utf8mb3, /* to_lower */ + to_upper_utf8mb3, /* to_upper */ NULL, /* sort_order */ NULL, /* uca */ NULL, /* tab_to_uni */ @@ -5628,8 +5630,8 @@ struct charset_info_st my_charset_utf8_nopad_bin= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, - &my_collation_utf8_nopad_bin_handler + &my_charset_utf8mb3_handler, + &my_collation_utf8mb3_nopad_bin_handler }; @@ -5642,7 +5644,7 @@ struct charset_info_st my_charset_utf8_nopad_bin= * variable to what they actually do. */ -static int my_strnncoll_utf8_cs(CHARSET_INFO *cs, +static int my_strnncoll_utf8mb3_cs(CHARSET_INFO *cs, const uchar *s, size_t slen, const uchar *t, size_t tlen, my_bool t_is_prefix) @@ -5657,8 +5659,8 @@ static int my_strnncoll_utf8_cs(CHARSET_INFO *cs, while ( s < se && t < te ) { - s_res=my_utf8_uni(cs,&s_wc, s, se); - t_res=my_utf8_uni(cs,&t_wc, t, te); + s_res=my_utf8mb3_uni(cs,&s_wc, s, se); + t_res=my_utf8mb3_uni(cs,&t_wc, t, te); if ( s_res <= 0 || t_res <= 0 ) @@ -5687,7 +5689,7 @@ static int my_strnncoll_utf8_cs(CHARSET_INFO *cs, return t_is_prefix ? t-te : ((diff == 0) ? save_diff : diff); } -static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs, +static int my_strnncollsp_utf8mb3_cs(CHARSET_INFO *cs, const uchar *s, size_t slen, const uchar *t, size_t tlen) { @@ -5700,8 +5702,8 @@ static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs, while ( s < se && t < te ) { - s_res=my_utf8_uni(cs,&s_wc, s, se); - t_res=my_utf8_uni(cs,&t_wc, t, te); + s_res=my_utf8mb3_uni(cs,&s_wc, s, se); + t_res=my_utf8mb3_uni(cs,&t_wc, t, te); if ( s_res <= 0 || t_res <= 0 ) { @@ -5750,30 +5752,30 @@ static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs, static MY_COLLATION_HANDLER my_collation_cs_handler = { NULL, /* init */ - my_strnncoll_utf8_cs, - my_strnncollsp_utf8_cs, - my_strnxfrm_utf8_general_ci, + my_strnncoll_utf8mb3_cs, + my_strnncollsp_utf8mb3_cs, + my_strnxfrm_utf8mb3_general_ci, my_strnxfrmlen_unicode, my_like_range_simple, my_wildcmp_mb, - my_strcasecmp_utf8, + my_strcasecmp_utf8mb3, my_instr_mb, - my_hash_sort_utf8, + my_hash_sort_utf8mb3, my_propagate_simple }; -struct charset_info_st my_charset_utf8_general_cs= +struct charset_info_st my_charset_utf8mb3_general_cs= { 254,0,0, /* number */ - MY_CS_COMPILED|MY_CS_UNICODE, /* state */ - "utf8", /* cs name */ - "utf8_general_cs", /* name */ + MY_CS_COMPILED|MY_CS_UNICODE, /* state */ + MY_UTF8MB3, /* cs name */ + MY_UTF8MB3 "_general_cs", /* name */ "", /* comment */ NULL, /* tailoring */ - ctype_utf8, /* ctype */ - to_lower_utf8, /* to_lower */ - to_upper_utf8, /* to_upper */ - to_upper_utf8, /* sort_order */ + ctype_utf8mb3, /* ctype */ + to_lower_utf8mb3, /* to_lower */ + to_upper_utf8mb3, /* to_upper */ + to_upper_utf8mb3, /* sort_order */ NULL, /* uca */ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ @@ -5790,7 +5792,7 @@ struct charset_info_st my_charset_utf8_general_cs= ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ - &my_charset_utf8_handler, + &my_charset_utf8mb3_handler, &my_collation_cs_handler }; #endif /* Cybozu Hack */ @@ -7048,9 +7050,9 @@ my_charlen_filename(CHARSET_INFO *cs, const uchar *str, const uchar *end) /* #define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) -#define WEIGHT_MB1(x) my_weight_mb1_utf8_general_ci(x) -#define WEIGHT_MB2(x,y) my_weight_mb2_utf8_general_ci(x,y) -#define WEIGHT_MB3(x,y,z) my_weight_mb3_utf8_general_ci(x,y,z) +#define WEIGHT_MB1(x) my_weight_mb1_utf8mb3_general_ci(x) +#define WEIGHT_MB2(x,y) my_weight_mb2_utf8mb3_general_ci(x,y) +#define WEIGHT_MB3(x,y,z) my_weight_mb3_utf8mb3_general_ci(x,y,z) */ #include "strcoll.ic" @@ -7063,10 +7065,10 @@ static MY_COLLATION_HANDLER my_collation_filename_handler = my_strnxfrm_filename, my_strnxfrmlen_unicode, my_like_range_mb, - my_wildcmp_utf8, - my_strcasecmp_utf8, + my_wildcmp_utf8mb3, + my_strcasecmp_utf8mb3, my_instr_mb, - my_hash_sort_utf8, + my_hash_sort_utf8mb3, my_propagate_complex }; @@ -7080,10 +7082,10 @@ static MY_CHARSET_HANDLER my_charset_filename_handler= my_mb_wc_filename, my_wc_mb_filename, my_mb_ctype_mb, - my_caseup_str_utf8, - my_casedn_str_utf8, - my_caseup_utf8, - my_casedn_utf8, + my_caseup_str_utf8mb3, + my_casedn_str_utf8mb3, + my_caseup_utf8mb3, + my_casedn_utf8mb3, my_snprintf_8bit, my_long10_to_str_8bit, my_longlong10_to_str_8bit, @@ -7112,10 +7114,10 @@ struct charset_info_st my_charset_filename= "filename", /* name */ "", /* comment */ NULL, /* tailoring */ - ctype_utf8, /* ctype */ - to_lower_utf8, /* to_lower */ - to_upper_utf8, /* to_upper */ - to_upper_utf8, /* sort_order */ + ctype_utf8mb3, /* ctype */ + to_lower_utf8mb3, /* to_lower */ + to_upper_utf8mb3, /* to_upper */ + to_upper_utf8mb3, /* sort_order */ NULL, /* uca */ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ @@ -7506,12 +7508,12 @@ my_casedn_str_utf8mb4(CHARSET_INFO *cs, char *src) the original string, for example: "U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE" - (which is 0xC4B0 in utf8, i.e. two bytes) + (which is 0xC4B0 in utf8mb3, i.e. two bytes) is converted into "U+0069 LATIN SMALL LETTER I" - (which is 0x69 in utf8, i.e. one byte) + (which is 0x69 in utf8mb3, i.e. one byte) So, we need to put '\0' terminator after converting. */ @@ -7653,9 +7655,9 @@ my_charlen_utf8mb4(CHARSET_INFO *cs __attribute__((unused)), #define UNICASE_PAGES my_unicase_default_pages #define IS_MB4_CHAR(b0,b1,b2,b3) IS_UTF8MB4_STEP3(b0,b1,b2,b3) #define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) -#define WEIGHT_MB1(b0) my_weight_mb1_utf8_general_ci(b0) -#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf8_general_ci(b0,b1) -#define WEIGHT_MB3(b0,b1,b2) my_weight_mb3_utf8_general_ci(b0,b1,b2) +#define WEIGHT_MB1(b0) my_weight_mb1_utf8mb3_general_ci(b0) +#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf8mb3_general_ci(b0,b1) +#define WEIGHT_MB3(b0,b1,b2) my_weight_mb3_utf8mb3_general_ci(b0,b1,b2) /* All non-BMP characters have the same weight. */ @@ -7676,9 +7678,9 @@ my_charlen_utf8mb4(CHARSET_INFO *cs __attribute__((unused)), #define MY_FUNCTION_NAME(x) my_ ## x ## _utf8mb4_general_nopad_ci #define IS_MB4_CHAR(b0,b1,b2,b3) IS_UTF8MB4_STEP3(b0,b1,b2,b3) #define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) -#define WEIGHT_MB1(b0) my_weight_mb1_utf8_general_ci(b0) -#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf8_general_ci(b0,b1) -#define WEIGHT_MB3(b0,b1,b2) my_weight_mb3_utf8_general_ci(b0,b1,b2) +#define WEIGHT_MB1(b0) my_weight_mb1_utf8mb3_general_ci(b0) +#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf8mb3_general_ci(b0,b1) +#define WEIGHT_MB3(b0,b1,b2) my_weight_mb3_utf8mb3_general_ci(b0,b1,b2) /* All non-BMP characters have the same weight. */ @@ -7777,7 +7779,7 @@ MY_CHARSET_HANDLER my_charset_utf8mb4_handler= my_snprintf_8bit, my_long10_to_str_8bit, my_longlong10_to_str_8bit, - my_fill_utf8, + my_fill_utf8mb3, my_strntol_8bit, my_strntoul_8bit, my_strntoll_8bit, diff --git a/strings/ctype.c b/strings/ctype.c index 32c41e6e9e7..9a89c6fe41d 100644 --- a/strings/ctype.c +++ b/strings/ctype.c @@ -413,7 +413,7 @@ tailoring_append2(MY_XML_PARSER *st, static size_t scan_one_character(const char *s, const char *e, my_wc_t *wc) { - CHARSET_INFO *cs= &my_charset_utf8_general_ci; + CHARSET_INFO *cs= &my_charset_utf8mb3_general_ci; if (s >= e) return 0; diff --git a/strings/strcoll.ic b/strings/strcoll.ic index 50278135dd4..e7d614ebdf5 100644 --- a/strings/strcoll.ic +++ b/strings/strcoll.ic @@ -105,7 +105,7 @@ MY_FUNCTION_NAME(scan_weight)(int *weight, const uchar *str, const uchar *end) #ifdef IS_MB1_MBHEAD_UNUSED_GAP /* Quickly filter out unused bytes that are neither MB1 nor MBHEAD. - E.g. [0x80..0xC1] in utf8. This allows using simplified conditions + E.g. [0x80..0xC1] in utf8mb(3|4). This allows using simplified conditions in IS_MB2_CHAR(), IS_MB3_CHAR(), etc. */ if (IS_MB1_MBHEAD_UNUSED_GAP(*str)) @@ -158,7 +158,7 @@ bad: Note, cs->coll->strnncoll() is usually used to compare identifiers. Perhaps we should eventually (in 10.2?) create a new collation - my_charset_utf8_general_ci_no_pad and have only one comparison function + my_charset_utf8mb3_general_ci_no_pad and have only one comparison function in MY_COLLATION_HANDLER. @param cs - the character set and collation @@ -339,7 +339,7 @@ MY_FUNCTION_NAME(strnxfrm)(CHARSET_INFO *cs, Store sorting weights using 2 bytes per character. This function is shared between - - utf8mb3_general_ci, utf8_bin, ucs2_general_ci, ucs2_bin + - utf8mb3_general_ci, utf8mb3_bin, ucs2_general_ci, ucs2_bin which support BMP only (U+0000..U+FFFF). - utf8mb4_general_ci, utf16_general_ci, utf32_general_ci, which map all supplementary characters to weight 0xFFFD. @@ -473,7 +473,7 @@ MY_FUNCTION_NAME(strnxfrm_nopad)(CHARSET_INFO *cs, Store sorting weights using 2 bytes per character. These functions are shared between - - utf8mb3_general_ci, utf8_bin, ucs2_general_ci, ucs2_bin + - utf8mb3_general_ci, utf8mb3_bin, ucs2_general_ci, ucs2_bin which support BMP only (U+0000..U+FFFF). - utf8mb4_general_ci, utf16_general_ci, utf32_general_ci, which map all supplementary characters to weight 0xFFFD. diff --git a/unittest/json_lib/json_lib-t.c b/unittest/json_lib/json_lib-t.c index 11f02b204f8..378ebe201f5 100644 --- a/unittest/json_lib/json_lib-t.c +++ b/unittest/json_lib/json_lib-t.c @@ -173,7 +173,7 @@ test_search() int main() { - ci= &my_charset_utf8_general_ci; + ci= &my_charset_utf8mb3_general_ci; plan(6); diag("Testing json_lib functions."); diff --git a/unittest/mysys/ma_dyncol-t.c b/unittest/mysys/ma_dyncol-t.c index 124f16e15be..d76f1b49f8f 100644 --- a/unittest/mysys/ma_dyncol-t.c +++ b/unittest/mysys/ma_dyncol-t.c @@ -197,12 +197,12 @@ static CHARSET_INFO *charset_list[]= &my_charset_ujis_japanese_ci, &my_charset_ujis_bin, #endif -#ifdef HAVE_CHARSET_utf8 - &my_charset_utf8_general_ci, +#ifdef HAVE_CHARSET_utf8mb3 + &my_charset_utf8mb3_general_ci, #ifdef HAVE_UCA_COLLATIONS - &my_charset_utf8_unicode_ci, + &my_charset_utf8mb3_unicode_ci, #endif - &my_charset_utf8_bin, + &my_charset_utf8mb3_bin, #endif }; diff --git a/unittest/sql/explain_filename-t.cc b/unittest/sql/explain_filename-t.cc index 859cb0cdaa4..32291d5e621 100644 --- a/unittest/sql/explain_filename-t.cc +++ b/unittest/sql/explain_filename-t.cc @@ -31,7 +31,7 @@ static const char **error_messages[1]= { error_messages_txt }; int setup() { - system_charset_info = &my_charset_utf8_bin; + system_charset_info = &my_charset_utf8mb3_bin; my_default_lc_messages = &my_locale_en_US; /* Populate the necessary error messages */ diff --git a/unittest/strings/strings-t.c b/unittest/strings/strings-t.c index 00d49971595..fb526477234 100644 --- a/unittest/strings/strings-t.c +++ b/unittest/strings/strings-t.c @@ -85,12 +85,12 @@ static CHARSET_INFO *charset_list[]= &my_charset_ujis_japanese_ci, &my_charset_ujis_bin, #endif -#ifdef HAVE_CHARSET_utf8 - &my_charset_utf8_general_ci, +#ifdef HAVE_CHARSET_utf8mb3 + &my_charset_utf8mb3_general_ci, #ifdef HAVE_UCA_COLLATIONS - &my_charset_utf8_unicode_ci, + &my_charset_utf8mb3_unicode_ci, #endif - &my_charset_utf8_bin, + &my_charset_utf8mb3_bin, #endif }; @@ -743,9 +743,9 @@ test_strcollsp() failed+= strcollsp(&my_charset_utf32_bin, strcoll_utf32_common); #endif #ifdef HAVE_CHARSET_utf8 - failed+= strcollsp(&my_charset_utf8_general_ci, strcoll_utf8mb3_common); - failed+= strcollsp(&my_charset_utf8_general_mysql500_ci, strcoll_utf8mb3_common); - failed+= strcollsp(&my_charset_utf8_bin, strcoll_utf8mb3_common); + failed+= strcollsp(&my_charset_utf8mb3_general_ci, strcoll_utf8mb3_common); + failed+= strcollsp(&my_charset_utf8mb3_general_mysql500_ci, strcoll_utf8mb3_common); + failed+= strcollsp(&my_charset_utf8mb3_bin, strcoll_utf8mb3_common); #endif #ifdef HAVE_CHARSET_utf8mb4 failed+= strcollsp(&my_charset_utf8mb4_general_ci, strcoll_utf8mb3_common); |