summaryrefslogtreecommitdiff
path: root/strings
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mysql.com>2010-02-24 13:52:59 +0400
committerAlexander Barkov <bar@mysql.com>2010-02-24 13:52:59 +0400
commitc803a7c460e7317b421b9ca15e5caff350245773 (patch)
tree3415439db845a6474471e4a6b8b460e3256965e7 /strings
parent4836199d701af05a9c89563b642c51ee5640e804 (diff)
parent8994fad85db18b4ab31fc67e2f8e15f1203d0b1a (diff)
downloadmariadb-git-c803a7c460e7317b421b9ca15e5caff350245773.tar.gz
Merging WL#1213 into mysql-next-mr-bar2
Diffstat (limited to 'strings')
-rw-r--r--strings/ctype-mb.c38
-rw-r--r--strings/ctype-uca.c1986
-rw-r--r--strings/ctype-ucs2.c3100
-rw-r--r--strings/ctype-utf8.c1054
4 files changed, 5417 insertions, 761 deletions
diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c
index 252c5a08b8c..98b598c3c2c 100644
--- a/strings/ctype-mb.c
+++ b/strings/ctype-mb.c
@@ -467,10 +467,11 @@ uint my_instr_mb(CHARSET_INFO *cs,
/* BINARY collations handlers for MB charsets */
-static int my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
- const uchar *s, size_t slen,
- const uchar *t, size_t tlen,
- my_bool t_is_prefix)
+int
+my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
+ const uchar *s, size_t slen,
+ const uchar *t, size_t tlen,
+ my_bool t_is_prefix)
{
size_t len=min(slen,tlen);
int cmp= memcmp(s,t,len);
@@ -503,10 +504,11 @@ static int my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
0 if strings are equal
*/
-static int my_strnncollsp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
- const uchar *a, size_t a_length,
- const uchar *b, size_t b_length,
- my_bool diff_if_only_endspace_difference)
+int
+my_strnncollsp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
+ const uchar *a, size_t a_length,
+ const uchar *b, size_t b_length,
+ my_bool diff_if_only_endspace_difference)
{
const uchar *end;
size_t length;
@@ -562,14 +564,17 @@ static size_t my_strnxfrm_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
}
-static int my_strcasecmp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
- const char *s, const char *t)
+int
+my_strcasecmp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
+ const char *s, const char *t)
{
return strcmp(s,t);
}
-static void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
- const uchar *key, size_t len,ulong *nr1, ulong *nr2)
+
+void
+my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
+ const uchar *key, size_t len,ulong *nr1, ulong *nr2)
{
const uchar *pos = key;
@@ -787,10 +792,11 @@ fill_max_and_min:
}
-static int my_wildcmp_mb_bin(CHARSET_INFO *cs,
- const char *str,const char *str_end,
- const char *wildstr,const char *wildend,
- int escape, int w_one, int w_many)
+int
+my_wildcmp_mb_bin(CHARSET_INFO *cs,
+ const char *str,const char *str_end,
+ const char *wildstr,const char *wildend,
+ int escape, int w_one, int w_many)
{
int result= -1; /* Not found, using wildcards */
diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c
index 56f3ddccae4..7dbec5a1321 100644
--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@@ -9464,4 +9464,1990 @@ CHARSET_INFO my_charset_utf8_sinhala_uca_ci=
#endif /* HAVE_CHARSET_utf8 */
+
+#ifdef HAVE_CHARSET_utf8mb4
+
+extern MY_CHARSET_HANDLER my_charset_utf8mb4_handler;
+
+#define MY_CS_UTF8MB4_UCA_FLAGS (MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_UNICODE_SUPPLEMENT)
+
+CHARSET_INFO my_charset_utf8mb4_unicode_ci=
+{
+ 224,0,0, /* number */
+ MY_CS_UTF8MB4_UCA_FLAGS,/* state */
+ MY_UTF8MB4, /* csname */
+ MY_UTF8MB4 "_unicode_ci",/* name */
+ "", /* comment */
+ "", /* tailoring */
+ ctype_utf8, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ uca_length, /* sort_order */
+ NULL, /* contractions */
+ uca_weight, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 1, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf8mb4_handler,
+ &my_collation_any_uca_handler
+};
+
+
+CHARSET_INFO my_charset_utf8mb4_icelandic_uca_ci=
+{
+ 225,0,0, /* number */
+ MY_CS_UTF8MB4_UCA_FLAGS,/* state */
+ MY_UTF8MB4, /* csname */
+ MY_UTF8MB4 "_icelandic_ci",/* name */
+ "", /* comment */
+ icelandic, /* tailoring */
+ ctype_utf8, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 1, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf8mb4_handler,
+ &my_collation_any_uca_handler
+};
+
+CHARSET_INFO my_charset_utf8mb4_latvian_uca_ci=
+{
+ 226,0,0, /* number */
+ MY_CS_UTF8MB4_UCA_FLAGS,/* state */
+ MY_UTF8MB4, /* csname */
+ MY_UTF8MB4 "_latvian_ci", /* name */
+ "", /* comment */
+ latvian, /* tailoring */
+ ctype_utf8, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 1, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf8mb4_handler,
+ &my_collation_any_uca_handler
+};
+
+CHARSET_INFO my_charset_utf8mb4_romanian_uca_ci=
+{
+ 227,0,0, /* number */
+ MY_CS_UTF8MB4_UCA_FLAGS,/* state */
+ MY_UTF8MB4, /* csname */
+ MY_UTF8MB4 "_romanian_ci", /* name */
+ "", /* comment */
+ romanian, /* tailoring */
+ ctype_utf8, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 1, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf8mb4_handler,
+ &my_collation_any_uca_handler
+};
+
+CHARSET_INFO my_charset_utf8mb4_slovenian_uca_ci=
+{
+ 228,0,0, /* number */
+ MY_CS_UTF8MB4_UCA_FLAGS,/* state */
+ MY_UTF8MB4, /* csname */
+ MY_UTF8MB4 "_slovenian_ci",/* name */
+ "", /* comment */
+ slovenian, /* tailoring */
+ ctype_utf8, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 1, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf8mb4_handler,
+ &my_collation_any_uca_handler
+};
+
+CHARSET_INFO my_charset_utf8mb4_polish_uca_ci=
+{
+ 229,0,0, /* number */
+ MY_CS_UTF8MB4_UCA_FLAGS,/* state */
+ MY_UTF8MB4, /* csname */
+ MY_UTF8MB4 "_polish_ci", /* name */
+ "", /* comment */
+ polish, /* tailoring */
+ ctype_utf8, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 1, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf8mb4_handler,
+ &my_collation_any_uca_handler
+};
+
+CHARSET_INFO my_charset_utf8mb4_estonian_uca_ci=
+{
+ 230,0,0, /* number */
+ MY_CS_UTF8MB4_UCA_FLAGS,/* state */
+ MY_UTF8MB4, /* csname */
+ MY_UTF8MB4 "_estonian_ci", /* name */
+ "", /* comment */
+ estonian, /* tailoring */
+ ctype_utf8, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 1, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf8mb4_handler,
+ &my_collation_any_uca_handler
+};
+
+CHARSET_INFO my_charset_utf8mb4_spanish_uca_ci=
+{
+ 231,0,0, /* number */
+ MY_CS_UTF8MB4_UCA_FLAGS,/* state */
+ MY_UTF8MB4, /* csname */
+ MY_UTF8MB4 "_spanish_ci", /* name */
+ "", /* comment */
+ spanish, /* tailoring */
+ ctype_utf8, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 1, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf8mb4_handler,
+ &my_collation_any_uca_handler
+};
+
+CHARSET_INFO my_charset_utf8mb4_swedish_uca_ci=
+{
+ 232,0,0, /* number */
+ MY_CS_UTF8MB4_UCA_FLAGS,/* state */
+ MY_UTF8MB4, /* csname */
+ MY_UTF8MB4 "_swedish_ci", /* name */
+ "", /* comment */
+ swedish, /* tailoring */
+ ctype_utf8, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 1, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf8mb4_handler,
+ &my_collation_any_uca_handler
+};
+
+CHARSET_INFO my_charset_utf8mb4_turkish_uca_ci=
+{
+ 233,0,0, /* number */
+ MY_CS_UTF8MB4_UCA_FLAGS,/* state */
+ MY_UTF8MB4, /* csname */
+ MY_UTF8MB4 "_turkish_ci", /* name */
+ "", /* comment */
+ turkish, /* tailoring */
+ ctype_utf8, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_turkish, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 2, /* caseup_multiply */
+ 2, /* casedn_multiply */
+ 1, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf8mb4_handler,
+ &my_collation_any_uca_handler
+};
+
+CHARSET_INFO my_charset_utf8mb4_czech_uca_ci=
+{
+ 234,0,0, /* number */
+ MY_CS_UTF8MB4_UCA_FLAGS,/* state */
+ MY_UTF8MB4, /* csname */
+ MY_UTF8MB4 "_czech_ci", /* name */
+ "", /* comment */
+ czech, /* tailoring */
+ ctype_utf8, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 1, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf8mb4_handler,
+ &my_collation_any_uca_handler
+};
+
+
+CHARSET_INFO my_charset_utf8mb4_danish_uca_ci=
+{
+ 235,0,0, /* number */
+ MY_CS_UTF8MB4_UCA_FLAGS,/* state */
+ MY_UTF8MB4, /* csname */
+ MY_UTF8MB4 "_danish_ci", /* name */
+ "", /* comment */
+ danish, /* tailoring */
+ ctype_utf8, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 1, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf8mb4_handler,
+ &my_collation_any_uca_handler
+};
+
+CHARSET_INFO my_charset_utf8mb4_lithuanian_uca_ci=
+{
+ 236,0,0, /* number */
+ MY_CS_UTF8MB4_UCA_FLAGS,/* state */
+ MY_UTF8MB4, /* csname */
+ MY_UTF8MB4 "_lithuanian_ci",/* name */
+ "", /* comment */
+ lithuanian, /* tailoring */
+ ctype_utf8, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 1, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf8mb4_handler,
+ &my_collation_any_uca_handler
+};
+
+CHARSET_INFO my_charset_utf8mb4_slovak_uca_ci=
+{
+ 237,0,0, /* number */
+ MY_CS_UTF8MB4_UCA_FLAGS,/* state */
+ MY_UTF8MB4, /* csname */
+ MY_UTF8MB4 "_slovak_ci", /* name */
+ "", /* comment */
+ slovak, /* tailoring */
+ ctype_utf8, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 1, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf8mb4_handler,
+ &my_collation_any_uca_handler
+};
+
+CHARSET_INFO my_charset_utf8mb4_spanish2_uca_ci=
+{
+ 238,0,0, /* number */
+ MY_CS_UTF8MB4_UCA_FLAGS,/* state */
+ MY_UTF8MB4, /* csname */
+ MY_UTF8MB4 "_spanish2_ci", /* name */
+ "", /* comment */
+ spanish2, /* tailoring */
+ ctype_utf8, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 1, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf8mb4_handler,
+ &my_collation_any_uca_handler
+};
+
+CHARSET_INFO my_charset_utf8mb4_roman_uca_ci=
+{
+ 239,0,0, /* number */
+ MY_CS_UTF8MB4_UCA_FLAGS,/* state */
+ MY_UTF8MB4, /* csname */
+ MY_UTF8MB4 "_roman_ci", /* name */
+ "", /* comment */
+ roman, /* tailoring */
+ ctype_utf8, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 1, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf8mb4_handler,
+ &my_collation_any_uca_handler
+};
+
+CHARSET_INFO my_charset_utf8mb4_persian_uca_ci=
+{
+ 240,0,0, /* number */
+ MY_CS_UTF8MB4_UCA_FLAGS,/* state */
+ MY_UTF8MB4, /* csname */
+ MY_UTF8MB4 "_persian_ci", /* name */
+ "", /* comment */
+ persian, /* tailoring */
+ ctype_utf8, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 1, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf8mb4_handler,
+ &my_collation_any_uca_handler
+};
+
+CHARSET_INFO my_charset_utf8mb4_esperanto_uca_ci=
+{
+ 241,0,0, /* number */
+ MY_CS_UTF8MB4_UCA_FLAGS,/* state */
+ MY_UTF8MB4, /* csname */
+ MY_UTF8MB4 "_esperanto_ci",/* name */
+ "", /* comment */
+ esperanto, /* tailoring */
+ ctype_utf8, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 1, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf8mb4_handler,
+ &my_collation_any_uca_handler
+};
+
+CHARSET_INFO my_charset_utf8mb4_hungarian_uca_ci=
+{
+ 242,0,0, /* number */
+ MY_CS_UTF8MB4_UCA_FLAGS,/* state */
+ MY_UTF8MB4, /* csname */
+ MY_UTF8MB4 "_hungarian_ci",/* name */
+ "", /* comment */
+ hungarian, /* tailoring */
+ ctype_utf8, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 1, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf8mb4_handler,
+ &my_collation_any_uca_handler
+};
+
+CHARSET_INFO my_charset_utf8mb4_sinhala_uca_ci=
+{
+ 243,0,0, /* number */
+ MY_CS_UTF8MB4_UCA_FLAGS,/* state */
+ MY_UTF8MB4, /* csname */
+ MY_UTF8MB4 "_sinhala_ci",/* name */
+ "", /* comment */
+ sinhala, /* tailoring */
+ ctype_utf8, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 1, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf8mb4_handler,
+ &my_collation_any_uca_handler
+};
+
+#endif /* HAVE_CHARSET_utf8mb4 */
+
+
+#ifdef HAVE_CHARSET_utf32
+
+MY_COLLATION_HANDLER my_collation_utf32_uca_handler =
+{
+ my_coll_init_uca, /* init */
+ my_strnncoll_any_uca,
+ my_strnncollsp_any_uca,
+ my_strnxfrm_any_uca,
+ my_strnxfrmlen_simple,
+ my_like_range_utf32,
+ my_wildcmp_uca,
+ NULL,
+ my_instr_mb,
+ my_hash_sort_any_uca,
+ my_propagate_complex
+};
+
+extern MY_CHARSET_HANDLER my_charset_utf32_handler;
+
+#define MY_CS_UTF32_UCA_FLAGS (MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII)
+
+CHARSET_INFO my_charset_utf32_unicode_ci=
+{
+ 160,0,0, /* number */
+ MY_CS_UTF32_UCA_FLAGS,/* state */
+ "utf32", /* csname */
+ "utf32_unicode_ci", /* name */
+ "", /* comment */
+ "", /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ uca_length, /* sort_order */
+ NULL, /* contractions */
+ uca_weight, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 4, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf32_handler,
+ &my_collation_utf32_uca_handler
+};
+
+
+CHARSET_INFO my_charset_utf32_icelandic_uca_ci=
+{
+ 161,0,0, /* number */
+ MY_CS_UTF32_UCA_FLAGS,/* state */
+ "utf32", /* csname */
+ "utf32_icelandic_ci",/* name */
+ "", /* comment */
+ icelandic, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 4, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf32_handler,
+ &my_collation_utf32_uca_handler
+};
+
+CHARSET_INFO my_charset_utf32_latvian_uca_ci=
+{
+ 162,0,0, /* number */
+ MY_CS_UTF32_UCA_FLAGS,/* state */
+ "utf32", /* csname */
+ "utf32_latvian_ci", /* name */
+ "", /* comment */
+ latvian, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 4, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf32_handler,
+ &my_collation_utf32_uca_handler
+};
+
+CHARSET_INFO my_charset_utf32_romanian_uca_ci=
+{
+ 163,0,0, /* number */
+ MY_CS_UTF32_UCA_FLAGS,/* state */
+ "utf32", /* csname */
+ "utf32_romanian_ci", /* name */
+ "", /* comment */
+ romanian, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 4, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf32_handler,
+ &my_collation_utf32_uca_handler
+};
+
+CHARSET_INFO my_charset_utf32_slovenian_uca_ci=
+{
+ 164,0,0, /* number */
+ MY_CS_UTF32_UCA_FLAGS,/* state */
+ "utf32", /* csname */
+ "utf32_slovenian_ci",/* name */
+ "", /* comment */
+ slovenian, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 4, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf32_handler,
+ &my_collation_utf32_uca_handler
+};
+
+CHARSET_INFO my_charset_utf32_polish_uca_ci=
+{
+ 165,0,0, /* number */
+ MY_CS_UTF32_UCA_FLAGS,/* state */
+ "utf32", /* csname */
+ "utf32_polish_ci", /* name */
+ "", /* comment */
+ polish, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 4, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf32_handler,
+ &my_collation_utf32_uca_handler
+};
+
+CHARSET_INFO my_charset_utf32_estonian_uca_ci=
+{
+ 166,0,0, /* number */
+ MY_CS_UTF32_UCA_FLAGS,/* state */
+ "utf32", /* csname */
+ "utf32_estonian_ci", /* name */
+ "", /* comment */
+ estonian, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 4, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf32_handler,
+ &my_collation_utf32_uca_handler
+};
+
+CHARSET_INFO my_charset_utf32_spanish_uca_ci=
+{
+ 167,0,0, /* number */
+ MY_CS_UTF32_UCA_FLAGS,/* state */
+ "utf32", /* csname */
+ "utf32_spanish_ci", /* name */
+ "", /* comment */
+ spanish, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 4, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf32_handler,
+ &my_collation_utf32_uca_handler
+};
+
+CHARSET_INFO my_charset_utf32_swedish_uca_ci=
+{
+ 168,0,0, /* number */
+ MY_CS_UTF32_UCA_FLAGS,/* state */
+ "utf32", /* csname */
+ "utf32_swedish_ci", /* name */
+ "", /* comment */
+ swedish, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 4, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf32_handler,
+ &my_collation_utf32_uca_handler
+};
+
+CHARSET_INFO my_charset_utf32_turkish_uca_ci=
+{
+ 169,0,0, /* number */
+ MY_CS_UTF32_UCA_FLAGS,/* state */
+ "utf32", /* csname */
+ "utf32_turkish_ci", /* name */
+ "", /* comment */
+ turkish, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_turkish, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 4, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf32_handler,
+ &my_collation_utf32_uca_handler
+};
+
+CHARSET_INFO my_charset_utf32_czech_uca_ci=
+{
+ 170,0,0, /* number */
+ MY_CS_UTF32_UCA_FLAGS,/* state */
+ "utf32", /* csname */
+ "utf32_czech_ci", /* name */
+ "", /* comment */
+ czech, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 4, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf32_handler,
+ &my_collation_utf32_uca_handler
+};
+
+
+CHARSET_INFO my_charset_utf32_danish_uca_ci=
+{
+ 171,0,0, /* number */
+ MY_CS_UTF32_UCA_FLAGS,/* state */
+ "utf32", /* csname */
+ "utf32_danish_ci", /* name */
+ "", /* comment */
+ danish, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 4, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf32_handler,
+ &my_collation_utf32_uca_handler
+};
+
+CHARSET_INFO my_charset_utf32_lithuanian_uca_ci=
+{
+ 172,0,0, /* number */
+ MY_CS_UTF32_UCA_FLAGS,/* state */
+ "utf32", /* csname */
+ "utf32_lithuanian_ci",/* name */
+ "", /* comment */
+ lithuanian, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 4, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf32_handler,
+ &my_collation_utf32_uca_handler
+};
+
+CHARSET_INFO my_charset_utf32_slovak_uca_ci=
+{
+ 173,0,0, /* number */
+ MY_CS_UTF32_UCA_FLAGS,/* state */
+ "utf32", /* csname */
+ "utf32_slovak_ci", /* name */
+ "", /* comment */
+ slovak, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 4, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf32_handler,
+ &my_collation_utf32_uca_handler
+};
+
+CHARSET_INFO my_charset_utf32_spanish2_uca_ci=
+{
+ 174,0,0, /* number */
+ MY_CS_UTF32_UCA_FLAGS,/* state */
+ "utf32", /* csname */
+ "utf32_spanish2_ci", /* name */
+ "", /* comment */
+ spanish2, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 4, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf32_handler,
+ &my_collation_utf32_uca_handler
+};
+
+CHARSET_INFO my_charset_utf32_roman_uca_ci=
+{
+ 175,0,0, /* number */
+ MY_CS_UTF32_UCA_FLAGS,/* state */
+ "utf32", /* csname */
+ "utf32_roman_ci", /* name */
+ "", /* comment */
+ roman, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 4, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf32_handler,
+ &my_collation_utf32_uca_handler
+};
+
+CHARSET_INFO my_charset_utf32_persian_uca_ci=
+{
+ 176,0,0, /* number */
+ MY_CS_UTF32_UCA_FLAGS,/* state */
+ "utf32", /* csname */
+ "utf32_persian_ci", /* name */
+ "", /* comment */
+ persian, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 4, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf32_handler,
+ &my_collation_utf32_uca_handler
+};
+
+CHARSET_INFO my_charset_utf32_esperanto_uca_ci=
+{
+ 177,0,0, /* number */
+ MY_CS_UTF32_UCA_FLAGS,/* state */
+ "utf32", /* csname */
+ "utf32_esperanto_ci",/* name */
+ "", /* comment */
+ esperanto, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 4, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf32_handler,
+ &my_collation_utf32_uca_handler
+};
+
+CHARSET_INFO my_charset_utf32_hungarian_uca_ci=
+{
+ 178,0,0, /* number */
+ MY_CS_UTF32_UCA_FLAGS,/* state */
+ "utf32", /* csname */
+ "utf32_hungarian_ci",/* name */
+ "", /* comment */
+ hungarian, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 4, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf32_handler,
+ &my_collation_utf32_uca_handler
+};
+
+CHARSET_INFO my_charset_utf32_sinhala_uca_ci=
+{
+ 179,0,0, /* number */
+ MY_CS_UTF32_UCA_FLAGS,/* state */
+ "utf32", /* csname */
+ "utf32_sinhala_ci", /* name */
+ "", /* comment */
+ sinhala, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 4, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf32_handler,
+ &my_collation_utf32_uca_handler
+};
+
+#endif /* HAVE_CHARSET_utf32 */
+
+
+#ifdef HAVE_CHARSET_utf16
+
+
+MY_COLLATION_HANDLER my_collation_utf16_uca_handler =
+{
+ my_coll_init_uca, /* init */
+ my_strnncoll_any_uca,
+ my_strnncollsp_any_uca,
+ my_strnxfrm_any_uca,
+ my_strnxfrmlen_simple,
+ my_like_range_utf16,
+ my_wildcmp_uca,
+ NULL,
+ my_instr_mb,
+ my_hash_sort_any_uca,
+ my_propagate_complex
+};
+
+extern MY_CHARSET_HANDLER my_charset_utf16_handler;
+
+#define MY_CS_UTF16_UCA_FLAGS (MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII)
+
+CHARSET_INFO my_charset_utf16_unicode_ci=
+{
+ 101,0,0, /* number */
+ MY_CS_UTF16_UCA_FLAGS,/* state */
+ "utf16", /* csname */
+ "utf16_unicode_ci", /* name */
+ "", /* comment */
+ "", /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ uca_length, /* sort_order */
+ NULL, /* contractions */
+ uca_weight, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 2, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf16_handler,
+ &my_collation_utf16_uca_handler
+};
+
+
+CHARSET_INFO my_charset_utf16_icelandic_uca_ci=
+{
+ 102,0,0, /* number */
+ MY_CS_UTF16_UCA_FLAGS,/* state */
+ "utf16", /* csname */
+ "utf16_icelandic_ci",/* name */
+ "", /* comment */
+ icelandic, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 2, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf16_handler,
+ &my_collation_utf16_uca_handler
+};
+
+CHARSET_INFO my_charset_utf16_latvian_uca_ci=
+{
+ 103,0,0, /* number */
+ MY_CS_UTF16_UCA_FLAGS,/* state */
+ "utf16", /* cs name */
+ "utf16_latvian_ci", /* name */
+ "", /* comment */
+ latvian, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 2, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf16_handler,
+ &my_collation_utf16_uca_handler
+};
+
+CHARSET_INFO my_charset_utf16_romanian_uca_ci=
+{
+ 104,0,0, /* number */
+ MY_CS_UTF16_UCA_FLAGS,/* state */
+ "utf16", /* cs name */
+ "utf16_romanian_ci", /* name */
+ "", /* comment */
+ romanian, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 2, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf16_handler,
+ &my_collation_utf16_uca_handler
+};
+
+CHARSET_INFO my_charset_utf16_slovenian_uca_ci=
+{
+ 105,0,0, /* number */
+ MY_CS_UTF16_UCA_FLAGS,/* state */
+ "utf16", /* cs name */
+ "utf16_slovenian_ci",/* name */
+ "", /* comment */
+ slovenian, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 2, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf16_handler,
+ &my_collation_utf16_uca_handler
+};
+
+CHARSET_INFO my_charset_utf16_polish_uca_ci=
+{
+ 106,0,0, /* number */
+ MY_CS_UTF16_UCA_FLAGS,/* state */
+ "utf16", /* cs name */
+ "utf16_polish_ci", /* name */
+ "", /* comment */
+ polish, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 2, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf16_handler,
+ &my_collation_utf16_uca_handler
+};
+
+CHARSET_INFO my_charset_utf16_estonian_uca_ci=
+{
+ 107,0,0, /* number */
+ MY_CS_UTF16_UCA_FLAGS,/* state */
+ "utf16", /* cs name */
+ "utf16_estonian_ci", /* name */
+ "", /* comment */
+ estonian, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 2, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf16_handler,
+ &my_collation_utf16_uca_handler
+};
+
+CHARSET_INFO my_charset_utf16_spanish_uca_ci=
+{
+ 108,0,0, /* number */
+ MY_CS_UTF16_UCA_FLAGS,/* state */
+ "utf16", /* cs name */
+ "utf16_spanish_ci", /* name */
+ "", /* comment */
+ spanish, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 2, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf16_handler,
+ &my_collation_utf16_uca_handler
+};
+
+CHARSET_INFO my_charset_utf16_swedish_uca_ci=
+{
+ 109,0,0, /* number */
+ MY_CS_UTF16_UCA_FLAGS,/* state */
+ "utf16", /* cs name */
+ "utf16_swedish_ci", /* name */
+ "", /* comment */
+ swedish, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 2, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf16_handler,
+ &my_collation_utf16_uca_handler
+};
+
+CHARSET_INFO my_charset_utf16_turkish_uca_ci=
+{
+ 110,0,0, /* number */
+ MY_CS_UTF16_UCA_FLAGS,/* state */
+ "utf16", /* cs name */
+ "utf16_turkish_ci", /* name */
+ "", /* comment */
+ turkish, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_turkish, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 2, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf16_handler,
+ &my_collation_utf16_uca_handler
+};
+
+CHARSET_INFO my_charset_utf16_czech_uca_ci=
+{
+ 111,0,0, /* number */
+ MY_CS_UTF16_UCA_FLAGS,/* state */
+ "utf16", /* cs name */
+ "utf16_czech_ci", /* name */
+ "", /* comment */
+ czech, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 2, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf16_handler,
+ &my_collation_utf16_uca_handler
+};
+
+
+CHARSET_INFO my_charset_utf16_danish_uca_ci=
+{
+ 112,0,0, /* number */
+ MY_CS_UTF16_UCA_FLAGS,/* state */
+ "utf16", /* cs name */
+ "utf16_danish_ci", /* name */
+ "", /* comment */
+ danish, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 2, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf16_handler,
+ &my_collation_utf16_uca_handler
+};
+
+CHARSET_INFO my_charset_utf16_lithuanian_uca_ci=
+{
+ 113,0,0, /* number */
+ MY_CS_UTF16_UCA_FLAGS,/* state */
+ "utf16", /* cs name */
+ "utf16_lithuanian_ci",/* name */
+ "", /* comment */
+ lithuanian, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 2, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf16_handler,
+ &my_collation_utf16_uca_handler
+};
+
+CHARSET_INFO my_charset_utf16_slovak_uca_ci=
+{
+ 114,0,0, /* number */
+ MY_CS_UTF16_UCA_FLAGS,/* state */
+ "utf16", /* cs name */
+ "utf16_slovak_ci", /* name */
+ "", /* comment */
+ slovak, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 2, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf16_handler,
+ &my_collation_utf16_uca_handler
+};
+
+CHARSET_INFO my_charset_utf16_spanish2_uca_ci=
+{
+ 115,0,0, /* number */
+ MY_CS_UTF16_UCA_FLAGS,/* state */
+ "utf16", /* cs name */
+ "utf16_spanish2_ci",/* name */
+ "", /* comment */
+ spanish2, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 2, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf16_handler,
+ &my_collation_utf16_uca_handler
+};
+
+CHARSET_INFO my_charset_utf16_roman_uca_ci=
+{
+ 116,0,0, /* number */
+ MY_CS_UTF16_UCA_FLAGS,/* state */
+ "utf16", /* cs name */
+ "utf16_roman_ci", /* name */
+ "", /* comment */
+ roman, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 2, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf16_handler,
+ &my_collation_utf16_uca_handler
+};
+
+CHARSET_INFO my_charset_utf16_persian_uca_ci=
+{
+ 117,0,0, /* number */
+ MY_CS_UTF16_UCA_FLAGS,/* state */
+ "utf16", /* cs name */
+ "utf16_persian_ci", /* name */
+ "", /* comment */
+ persian, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 2, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf16_handler,
+ &my_collation_utf16_uca_handler
+};
+
+CHARSET_INFO my_charset_utf16_esperanto_uca_ci=
+{
+ 118,0,0, /* number */
+ MY_CS_UTF16_UCA_FLAGS,/* state */
+ "utf16", /* cs name */
+ "utf16_esperanto_ci",/* name */
+ "", /* comment */
+ esperanto, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 2, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf16_handler,
+ &my_collation_utf16_uca_handler
+};
+
+CHARSET_INFO my_charset_utf16_hungarian_uca_ci=
+{
+ 119,0,0, /* number */
+ MY_CS_UTF16_UCA_FLAGS,/* state */
+ "utf16", /* cs name */
+ "utf16_hungarian_ci",/* name */
+ "", /* comment */
+ hungarian, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default,/* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 2, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf16_handler,
+ &my_collation_utf16_uca_handler
+};
+
+CHARSET_INFO my_charset_utf16_sinhala_uca_ci=
+{
+ 120,0,0, /* number */
+ MY_CS_UTF16_UCA_FLAGS,/* state */
+ "utf16", /* cs name */
+ "utf16_sinhala_ci",/* name */
+ "", /* comment */
+ sinhala, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default,/* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 2, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf16_handler,
+ &my_collation_utf16_uca_handler
+};
+
+#endif /* HAVE_CHARSET_utf16 */
+
+
+
#endif /* HAVE_UCA_COLLATIONS */
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index 069131ba354..6de0ea8f7e8 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -25,479 +25,81 @@
#include <stdarg.h>
-#ifdef HAVE_CHARSET_ucs2
-
-#ifndef EILSEQ
-#define EILSEQ ENOENT
+#if defined(HAVE_CHARSET_utf16) || defined(HAVE_CHARSET_ucs2)
+#define HAVE_CHARSET_mb2
#endif
-static uchar ctype_ucs2[] = {
- 0,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
- 132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
- 16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16,
- 16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-};
-
-static uchar to_lower_ucs2[] = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
- 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
- 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
- 64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
- 112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95,
- 96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
- 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
- 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
- 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
- 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
- 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
- 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
- 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
- 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
- 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
-};
+#if defined(HAVE_CHARSET_mb2) || defined(HAVE_CHARSET_utf32)
+#define HAVE_CHARSET_mb2_or_mb4
+#endif
-static uchar to_upper_ucs2[] = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
- 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
- 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
- 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
- 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
- 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
- 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
- 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
- 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
- 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
- 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
- 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
- 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
- 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
- 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
-};
+#ifndef EILSEQ
+#define EILSEQ ENOENT
+#endif
-static int my_ucs2_uni(CHARSET_INFO *cs __attribute__((unused)),
- my_wc_t * pwc, const uchar *s, const uchar *e)
-{
- if (s+2 > e) /* Need 2 characters */
- return MY_CS_TOOSMALL2;
-
- *pwc= ((uchar)s[0]) * 256 + ((uchar)s[1]);
- return 2;
-}
+#undef ULONGLONG_MAX
+#define ULONGLONG_MAX (~(ulonglong) 0)
+#define MAX_NEGATIVE_NUMBER ((ulonglong) LL(0x8000000000000000))
+#define INIT_CNT 9
+#define LFACTOR ULL(1000000000)
+#define LFACTOR1 ULL(10000000000)
+#define LFACTOR2 ULL(100000000000)
-static int my_uni_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
- my_wc_t wc, uchar *r, uchar *e)
-{
- if ( r+2 > e )
- return MY_CS_TOOSMALL2;
-
- r[0]= (uchar) (wc >> 8);
- r[1]= (uchar) (wc & 0xFF);
- return 2;
-}
+static unsigned long lfactor[9]=
+{ 1L, 10L, 100L, 1000L, 10000L, 100000L, 1000000L, 10000000L, 100000000L };
+#define REPLACEMENT_CHAR 0xFFFD;
-static size_t my_caseup_ucs2(CHARSET_INFO *cs, char *src, size_t srclen,
- char *dst __attribute__((unused)),
- size_t dstlen __attribute__((unused)))
-{
- my_wc_t wc;
- int res;
- char *srcend= src + srclen;
- MY_UNICASE_INFO **uni_plane= cs->caseinfo;
- DBUG_ASSERT(src == dst && srclen == dstlen);
-
- while ((src < srcend) &&
- (res= my_ucs2_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0)
- {
- int plane= (wc>>8) & 0xFF;
- wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].toupper : wc;
- if (res != my_uni_ucs2(cs, wc, (uchar*) src, (uchar*) srcend))
- break;
- src+= res;
- }
- return srclen;
-}
-static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, size_t slen,
- ulong *n1, ulong *n2)
+#ifdef HAVE_CHARSET_mb2_or_mb4
+static inline int
+my_bincmp(const uchar *s, const uchar *se,
+ const uchar *t, const uchar *te)
{
- my_wc_t wc;
- int res;
- const uchar *e=s+slen;
- MY_UNICASE_INFO **uni_plane= cs->caseinfo;
-
- while (e > s+1 && e[-1] == ' ' && e[-2] == '\0')
- e-= 2;
-
- while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e)) >0)
- {
- int plane = (wc>>8) & 0xFF;
- wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc;
- n1[0]^= (((n1[0] & 63)+n2[0])*(wc & 0xFF))+ (n1[0] << 8);
- n2[0]+=3;
- n1[0]^= (((n1[0] & 63)+n2[0])*(wc >> 8))+ (n1[0] << 8);
- n2[0]+=3;
- s+=res;
- }
+ int slen= (int) (se - s), tlen= (int) (te - t);
+ int len= min(slen, tlen);
+ int cmp= memcmp(s, t, len);
+ return cmp ? cmp : slen - tlen;
}
-static size_t my_caseup_str_ucs2(CHARSET_INFO * cs __attribute__((unused)),
- char * s __attribute__((unused)))
+static size_t
+my_caseup_str_mb2_or_mb4(CHARSET_INFO * cs __attribute__((unused)),
+ char * s __attribute__((unused)))
{
+ DBUG_ASSERT(0);
return 0;
}
-static size_t my_casedn_ucs2(CHARSET_INFO *cs, char *src, size_t srclen,
- char *dst __attribute__((unused)),
- size_t dstlen __attribute__((unused)))
-{
- my_wc_t wc;
- int res;
- char *srcend= src + srclen;
- MY_UNICASE_INFO **uni_plane= cs->caseinfo;
- DBUG_ASSERT(src == dst && srclen == dstlen);
-
- while ((src < srcend) &&
- (res= my_ucs2_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0)
- {
- int plane= (wc>>8) & 0xFF;
- wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].tolower : wc;
- if (res != my_uni_ucs2(cs, wc, (uchar*) src, (uchar*) srcend))
- break;
- src+= res;
- }
- return srclen;
-}
-
-
-static size_t my_casedn_str_ucs2(CHARSET_INFO *cs __attribute__((unused)),
- char * s __attribute__((unused)))
+static size_t
+my_casedn_str_mb2_or_mb4(CHARSET_INFO *cs __attribute__((unused)),
+ char * s __attribute__((unused)))
{
+ DBUG_ASSERT(0);
return 0;
}
-static int my_strnncoll_ucs2(CHARSET_INFO *cs,
- const uchar *s, size_t slen,
- const uchar *t, size_t tlen,
- my_bool t_is_prefix)
+static int
+my_strcasecmp_mb2_or_mb4(CHARSET_INFO *cs __attribute__((unused)),
+ const char *s __attribute__((unused)),
+ const char *t __attribute__((unused)))
{
- int s_res,t_res;
- my_wc_t UNINIT_VAR(s_wc),t_wc;
- const uchar *se=s+slen;
- const uchar *te=t+tlen;
- MY_UNICASE_INFO **uni_plane= cs->caseinfo;
-
- while ( s < se && t < te )
- {
- int plane;
- s_res=my_ucs2_uni(cs,&s_wc, s, se);
- t_res=my_ucs2_uni(cs,&t_wc, t, te);
-
- if ( s_res <= 0 || t_res <= 0 )
- {
- /* Incorrect string, compare by char value */
- return ((int)s[0]-(int)t[0]);
- }
-
- plane=(s_wc>>8) & 0xFF;
- s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc;
- plane=(t_wc>>8) & 0xFF;
- t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc;
- if ( s_wc != t_wc )
- {
- return s_wc > t_wc ? 1 : -1;
- }
-
- s+=s_res;
- t+=t_res;
- }
- return (int) (t_is_prefix ? t-te : ((se-s) - (te-t)));
-}
-
-/*
- Compare strings, discarding end space
-
- SYNOPSIS
- my_strnncollsp_ucs2()
- cs character set handler
- a First string to compare
- a_length Length of 'a'
- b Second string to compare
- b_length Length of 'b'
-
- IMPLEMENTATION
- If one string is shorter as the other, then we space extend the other
- so that the strings have equal length.
-
- This will ensure that the following things hold:
-
- "a" == "a "
- "a\0" < "a"
- "a\0" < "a "
-
- RETURN
- < 0 a < b
- = 0 a == b
- > 0 a > b
-*/
-
-static int my_strnncollsp_ucs2(CHARSET_INFO *cs __attribute__((unused)),
- const uchar *s, size_t slen,
- const uchar *t, size_t tlen,
- my_bool diff_if_only_endspace_difference
- __attribute__((unused)))
-{
- const uchar *se, *te;
- size_t minlen;
- MY_UNICASE_INFO **uni_plane= cs->caseinfo;
-
- /* extra safety to make sure the lengths are even numbers */
- slen&= ~1;
- tlen&= ~1;
-
- se= s + slen;
- te= t + tlen;
-
- for (minlen= min(slen, tlen); minlen; minlen-= 2)
- {
- int s_wc = uni_plane[s[0]] ? (int) uni_plane[s[0]][s[1]].sort :
- (((int) s[0]) << 8) + (int) s[1];
-
- int t_wc = uni_plane[t[0]] ? (int) uni_plane[t[0]][t[1]].sort :
- (((int) t[0]) << 8) + (int) t[1];
- if ( s_wc != t_wc )
- return s_wc > t_wc ? 1 : -1;
-
- s+= 2;
- t+= 2;
- }
-
- if (slen != tlen)
- {
- int swap= 1;
- if (slen < tlen)
- {
- s= t;
- se= te;
- swap= -1;
- }
-
- for ( ; s < se ; s+= 2)
- {
- if (s[0] || s[1] != ' ')
- return (s[0] == 0 && s[1] < ' ') ? -swap : swap;
- }
- }
+ DBUG_ASSERT(0);
return 0;
}
-static int my_strncasecmp_ucs2(CHARSET_INFO *cs,
- const char *s, const char *t, size_t len)
-{
- int s_res,t_res;
- my_wc_t UNINIT_VAR(s_wc),t_wc;
- const char *se=s+len;
- const char *te=t+len;
- MY_UNICASE_INFO **uni_plane= cs->caseinfo;
-
- while ( s < se && t < te )
- {
- int plane;
-
- s_res=my_ucs2_uni(cs,&s_wc, (const uchar*)s, (const uchar*)se);
- t_res=my_ucs2_uni(cs,&t_wc, (const uchar*)t, (const uchar*)te);
-
- if ( s_res <= 0 || t_res <= 0 )
- {
- /* Incorrect string, compare by char value */
- return ((int)s[0]-(int)t[0]);
- }
-
- plane=(s_wc>>8) & 0xFF;
- s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].tolower : s_wc;
-
- plane=(t_wc>>8) & 0xFF;
- t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].tolower : t_wc;
-
- if ( s_wc != t_wc )
- return ((int) s_wc) - ((int) t_wc);
-
- s+=s_res;
- t+=t_res;
- }
- return (int) ( (se-s) - (te-t) );
-}
-
-
-static int my_strcasecmp_ucs2(CHARSET_INFO *cs, const char *s, const char *t)
-{
- size_t s_len= strlen(s);
- size_t t_len= strlen(t);
- size_t len = (s_len > t_len) ? s_len : t_len;
- return my_strncasecmp_ucs2(cs, s, t, len);
-}
-
-
-static size_t my_strnxfrm_ucs2(CHARSET_INFO *cs,
- uchar *dst, size_t dstlen, const uchar *src,
- size_t srclen)
-{
- my_wc_t wc;
- int res;
- int plane;
- uchar *de = dst + dstlen;
- const uchar *se = src + srclen;
- MY_UNICASE_INFO **uni_plane= cs->caseinfo;
-
- while( src < se && dst < de )
- {
- if ((res=my_ucs2_uni(cs,&wc, src, se))<0)
- {
- break;
- }
- src+=res;
- srclen-=res;
-
- plane=(wc>>8) & 0xFF;
- wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc;
-
- if ((res=my_uni_ucs2(cs,wc,dst,de)) <0)
- {
- break;
- }
- dst+=res;
- }
- if (dst < de)
- cs->cset->fill(cs, (char*) dst, (size_t) (de - dst), ' ');
- return dstlen;
-}
-
-
-static uint my_ismbchar_ucs2(CHARSET_INFO *cs __attribute__((unused)),
- const char *b __attribute__((unused)),
- const char *e __attribute__((unused)))
-{
- return 2;
-}
-
-
-static uint my_mbcharlen_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
- uint c __attribute__((unused)))
-{
- return 2;
-}
-
-
-static int my_vsnprintf_ucs2(char *dst, size_t n, const char* fmt, va_list ap)
+static long
+my_strntol_mb2_or_mb4(CHARSET_INFO *cs,
+ const char *nptr, size_t l, int base,
+ char **endptr, int *err)
{
- char *start=dst, *end=dst+n-1;
- for (; *fmt ; fmt++)
- {
- if (fmt[0] != '%')
- {
- if (dst == end) /* End of buffer */
- break;
-
- *dst++='\0'; *dst++= *fmt; /* Copy ordinary char */
- continue;
- }
-
- fmt++;
-
- /* Skip if max size is used (to be compatible with printf) */
- while ( (*fmt>='0' && *fmt<='9') || *fmt == '.' || *fmt == '-')
- fmt++;
-
- if (*fmt == 'l')
- fmt++;
-
- if (*fmt == 's') /* String parameter */
- {
- reg2 char *par = va_arg(ap, char *);
- size_t plen;
- size_t left_len = (size_t)(end-dst);
- if (!par) par = (char*)"(null)";
- plen= strlen(par);
- if (left_len <= plen*2)
- plen = left_len/2 - 1;
-
- for ( ; plen ; plen--, dst+=2, par++)
- {
- dst[0]='\0';
- dst[1]=par[0];
- }
- continue;
- }
- else if (*fmt == 'd' || *fmt == 'u') /* Integer parameter */
- {
- register int iarg;
- char nbuf[16];
- char *pbuf=nbuf;
-
- if ((size_t) (end-dst) < 32)
- break;
- iarg = va_arg(ap, int);
- if (*fmt == 'd')
- int10_to_str((long) iarg, nbuf, -10);
- else
- int10_to_str((long) (uint) iarg,nbuf,10);
-
- for (; pbuf[0]; pbuf++)
- {
- *dst++='\0';
- *dst++=*pbuf;
- }
- continue;
- }
-
- /* We come here on '%%', unknown code or too long parameter */
- if (dst == end)
- break;
- *dst++='\0';
- *dst++='%'; /* % used as % or unknown code */
- }
-
- DBUG_ASSERT(dst <= end);
- *dst='\0'; /* End of errmessage */
- return (size_t) (dst - start);
-}
-
-static size_t my_snprintf_ucs2(CHARSET_INFO *cs __attribute__((unused)),
- char* to, size_t n, const char* fmt, ...)
-{
- va_list args;
- va_start(args,fmt);
- return my_vsnprintf_ucs2(to, n, fmt, args);
-}
-
-
-long my_strntol_ucs2(CHARSET_INFO *cs,
- const char *nptr, size_t l, int base,
- char **endptr, int *err)
-{
- int negative=0;
+ int negative= 0;
int overflow;
int cnv;
my_wc_t wc;
@@ -511,7 +113,7 @@ long my_strntol_ucs2(CHARSET_INFO *cs,
*err= 0;
do
{
- if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0)
+ if ((cnv= cs->cset->mb_wc(cs, &wc, s, e))>0)
{
switch (wc)
{
@@ -524,12 +126,12 @@ long my_strntol_ucs2(CHARSET_INFO *cs,
}
else /* No more characters or bad multibyte sequence */
{
- if (endptr !=NULL )
- *endptr = (char*)s;
- err[0] = (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM;
+ if (endptr != NULL )
+ *endptr= (char*) s;
+ err[0]= (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM;
return 0;
}
- s+=cnv;
+ s+= cnv;
} while (1);
bs:
@@ -539,39 +141,39 @@ bs:
base = 10;
#endif
- overflow = 0;
- res = 0;
- save = s;
- cutoff = ((uint32)~0L) / (uint32) base;
- cutlim = (uint) (((uint32)~0L) % (uint32) base);
+ overflow= 0;
+ res= 0;
+ save= s;
+ cutoff= ((uint32)~0L) / (uint32) base;
+ cutlim= (uint) (((uint32)~0L) % (uint32) base);
do {
- if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0)
+ if ((cnv= cs->cset->mb_wc(cs, &wc, s, e)) > 0)
{
- s+=cnv;
- if ( wc>='0' && wc<='9')
- wc -= '0';
- else if ( wc>='A' && wc<='Z')
- wc = wc - 'A' + 10;
- else if ( wc>='a' && wc<='z')
- wc = wc - 'a' + 10;
+ s+= cnv;
+ if (wc >= '0' && wc <= '9')
+ wc-= '0';
+ else if (wc >= 'A' && wc <= 'Z')
+ wc= wc - 'A' + 10;
+ else if (wc >= 'a' && wc <= 'z')
+ wc= wc - 'a' + 10;
else
break;
if ((int)wc >= base)
break;
if (res > cutoff || (res == cutoff && wc > cutlim))
- overflow = 1;
+ overflow= 1;
else
{
- res *= (uint32) base;
- res += wc;
+ res*= (uint32) base;
+ res+= wc;
}
}
- else if (cnv==MY_CS_ILSEQ)
+ else if (cnv == MY_CS_ILSEQ)
{
if (endptr !=NULL )
- *endptr = (char*)s;
- err[0]=EILSEQ;
+ *endptr = (char*) s;
+ err[0]= EILSEQ;
return 0;
}
else
@@ -586,21 +188,21 @@ bs:
if (s == save)
{
- err[0]=EDOM;
+ err[0]= EDOM;
return 0L;
}
if (negative)
{
if (res > (uint32) INT_MIN32)
- overflow = 1;
+ overflow= 1;
}
else if (res > INT_MAX32)
- overflow = 1;
+ overflow= 1;
if (overflow)
{
- err[0]=ERANGE;
+ err[0]= ERANGE;
return negative ? INT_MIN32 : INT_MAX32;
}
@@ -608,11 +210,12 @@ bs:
}
-ulong my_strntoul_ucs2(CHARSET_INFO *cs,
- const char *nptr, size_t l, int base,
- char **endptr, int *err)
+static ulong
+my_strntoul_mb2_or_mb4(CHARSET_INFO *cs,
+ const char *nptr, size_t l, int base,
+ char **endptr, int *err)
{
- int negative=0;
+ int negative= 0;
int overflow;
int cnv;
my_wc_t wc;
@@ -620,13 +223,13 @@ ulong my_strntoul_ucs2(CHARSET_INFO *cs,
register uint32 cutoff;
register uint32 res;
register const uchar *s= (const uchar*) nptr;
- register const uchar *e= (const uchar*) nptr+l;
+ register const uchar *e= (const uchar*) nptr + l;
const uchar *save;
*err= 0;
do
{
- if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0)
+ if ((cnv= cs->cset->mb_wc(cs, &wc, s, e)) > 0)
{
switch (wc)
{
@@ -640,11 +243,11 @@ ulong my_strntoul_ucs2(CHARSET_INFO *cs,
else /* No more characters or bad multibyte sequence */
{
if (endptr !=NULL )
- *endptr = (char*)s;
- err[0] = (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM;
+ *endptr= (char*)s;
+ err[0]= (cnv == MY_CS_ILSEQ) ? EILSEQ : EDOM;
return 0;
}
- s+=cnv;
+ s+= cnv;
} while (1);
bs:
@@ -654,40 +257,40 @@ bs:
base = 10;
#endif
- overflow = 0;
- res = 0;
- save = s;
- cutoff = ((uint32)~0L) / (uint32) base;
- cutlim = (uint) (((uint32)~0L) % (uint32) base);
+ overflow= 0;
+ res= 0;
+ save= s;
+ cutoff= ((uint32)~0L) / (uint32) base;
+ cutlim= (uint) (((uint32)~0L) % (uint32) base);
do
{
- if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0)
+ if ((cnv= cs->cset->mb_wc(cs, &wc, s, e)) > 0)
{
- s+=cnv;
- if ( wc>='0' && wc<='9')
- wc -= '0';
- else if ( wc>='A' && wc<='Z')
- wc = wc - 'A' + 10;
- else if ( wc>='a' && wc<='z')
- wc = wc - 'a' + 10;
+ s+= cnv;
+ if (wc >= '0' && wc <= '9')
+ wc-= '0';
+ else if (wc >= 'A' && wc <= 'Z')
+ wc= wc - 'A' + 10;
+ else if (wc >= 'a' && wc <= 'z')
+ wc= wc - 'a' + 10;
else
break;
- if ((int)wc >= base)
+ if ((int) wc >= base)
break;
if (res > cutoff || (res == cutoff && wc > cutlim))
overflow = 1;
else
{
- res *= (uint32) base;
- res += wc;
+ res*= (uint32) base;
+ res+= wc;
}
}
- else if (cnv==MY_CS_ILSEQ)
+ else if (cnv == MY_CS_ILSEQ)
{
- if (endptr !=NULL )
- *endptr = (char*)s;
- err[0]=EILSEQ;
+ if (endptr != NULL )
+ *endptr= (char*)s;
+ err[0]= EILSEQ;
return 0;
}
else
@@ -698,17 +301,17 @@ bs:
} while(1);
if (endptr != NULL)
- *endptr = (char *) s;
+ *endptr= (char *) s;
if (s == save)
{
- err[0]=EDOM;
+ err[0]= EDOM;
return 0L;
}
if (overflow)
{
- err[0]=(ERANGE);
+ err[0]= (ERANGE);
return (~(uint32) 0);
}
@@ -716,10 +319,10 @@ bs:
}
-
-longlong my_strntoll_ucs2(CHARSET_INFO *cs,
- const char *nptr, size_t l, int base,
- char **endptr, int *err)
+static longlong
+my_strntoll_mb2_or_mb4(CHARSET_INFO *cs,
+ const char *nptr, size_t l, int base,
+ char **endptr, int *err)
{
int negative=0;
int overflow;
@@ -832,13 +435,12 @@ bs:
}
-
-
-ulonglong my_strntoull_ucs2(CHARSET_INFO *cs,
- const char *nptr, size_t l, int base,
- char **endptr, int *err)
+static ulonglong
+my_strntoull_mb2_or_mb4(CHARSET_INFO *cs,
+ const char *nptr, size_t l, int base,
+ char **endptr, int *err)
{
- int negative=0;
+ int negative= 0;
int overflow;
int cnv;
my_wc_t wc;
@@ -846,13 +448,13 @@ ulonglong my_strntoull_ucs2(CHARSET_INFO *cs,
register unsigned int cutlim;
register ulonglong res;
register const uchar *s= (const uchar*) nptr;
- register const uchar *e= (const uchar*) nptr+l;
+ register const uchar *e= (const uchar*) nptr + l;
const uchar *save;
*err= 0;
do
{
- if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0)
+ if ((cnv= cs->cset->mb_wc(cs,&wc,s,e)) > 0)
{
switch (wc)
{
@@ -942,49 +544,51 @@ bs:
}
-double my_strntod_ucs2(CHARSET_INFO *cs __attribute__((unused)),
- char *nptr, size_t length,
- char **endptr, int *err)
+static double
+my_strntod_mb2_or_mb4(CHARSET_INFO *cs,
+ char *nptr, size_t length,
+ char **endptr, int *err)
{
char buf[256];
double res;
- register char *b=buf;
+ register char *b= buf;
register const uchar *s= (const uchar*) nptr;
const uchar *end;
my_wc_t wc;
- int cnv;
+ int cnv;
*err= 0;
/* Cut too long strings */
if (length >= sizeof(buf))
- length= sizeof(buf)-1;
- end= s+length;
+ length= sizeof(buf) - 1;
+ end= s + length;
- while ((cnv=cs->cset->mb_wc(cs,&wc,s,end)) > 0)
+ while ((cnv= cs->cset->mb_wc(cs,&wc,s,end)) > 0)
{
- s+=cnv;
+ s+= cnv;
if (wc > (int) (uchar) 'e' || !wc)
- break; /* Can't be part of double */
+ break; /* Can't be part of double */
*b++= (char) wc;
}
*endptr= b;
res= my_strtod(buf, endptr, err);
- *endptr= nptr + (size_t) (*endptr- buf);
+ *endptr= nptr + cs->mbminlen * (size_t) (*endptr - buf);
return res;
}
-ulonglong my_strntoull10rnd_ucs2(CHARSET_INFO *cs __attribute__((unused)),
- const char *nptr, size_t length,
- int unsign_fl,
- char **endptr, int *err)
+static ulonglong
+my_strntoull10rnd_mb2_or_mb4(CHARSET_INFO *cs,
+ const char *nptr, size_t length,
+ int unsign_fl,
+ char **endptr, int *err)
{
- char buf[256], *b= buf;
+ char buf[256], *b= buf;
ulonglong res;
const uchar *end, *s= (const uchar*) nptr;
my_wc_t wc;
- int cnv;
+ int cnv;
/* Cut too long strings */
if (length >= sizeof(buf))
@@ -1000,7 +604,7 @@ ulonglong my_strntoull10rnd_ucs2(CHARSET_INFO *cs __attribute__((unused)),
}
res= my_strntoull10rnd_8bit(cs, buf, b - buf, unsign_fl, endptr, err);
- *endptr= (char*) nptr + 2 * (size_t) (*endptr- buf);
+ *endptr= (char*) nptr + cs->mbminlen * (size_t) (*endptr - buf);
return res;
}
@@ -1009,23 +613,24 @@ ulonglong my_strntoull10rnd_ucs2(CHARSET_INFO *cs __attribute__((unused)),
This is a fast version optimized for the case of radix 10 / -10
*/
-size_t my_l10tostr_ucs2(CHARSET_INFO *cs,
- char *dst, size_t len, int radix, long int val)
+static size_t
+my_l10tostr_mb2_or_mb4(CHARSET_INFO *cs,
+ char *dst, size_t len, int radix, long int val)
{
char buffer[66];
register char *p, *db, *de;
long int new_val;
- int sl=0;
+ int sl= 0;
unsigned long int uval = (unsigned long int) val;
- p = &buffer[sizeof(buffer)-1];
- *p='\0';
+ p= &buffer[sizeof(buffer) - 1];
+ *p= '\0';
if (radix < 0)
{
if (val < 0)
{
- sl = 1;
+ sl= 1;
/* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */
uval = (unsigned long int)0 - uval;
}
@@ -1033,57 +638,58 @@ size_t my_l10tostr_ucs2(CHARSET_INFO *cs,
new_val = (long) (uval / 10);
*--p = '0'+ (char) (uval - (unsigned long) new_val * 10);
- val = new_val;
+ val= new_val;
while (val != 0)
{
- new_val=val/10;
- *--p = '0' + (char) (val-new_val*10);
+ new_val= val / 10;
+ *--p= '0' + (char) (val - new_val * 10);
val= new_val;
}
if (sl)
{
- *--p='-';
+ *--p= '-';
}
- for ( db=dst, de=dst+len ; (dst<de) && *p ; p++)
+ for ( db= dst, de= dst + len ; (dst < de) && *p ; p++)
{
- int cnvres=cs->cset->wc_mb(cs,(my_wc_t)p[0],(uchar*) dst, (uchar*) de);
- if (cnvres>0)
- dst+=cnvres;
+ int cnvres= cs->cset->wc_mb(cs,(my_wc_t)p[0],(uchar*) dst, (uchar*) de);
+ if (cnvres > 0)
+ dst+= cnvres;
else
break;
}
- return (int) (dst-db);
+ return (int) (dst - db);
}
-size_t my_ll10tostr_ucs2(CHARSET_INFO *cs __attribute__((unused)),
- char *dst, size_t len, int radix, longlong val)
+static size_t
+my_ll10tostr_mb2_or_mb4(CHARSET_INFO *cs,
+ char *dst, size_t len, int radix, longlong val)
{
char buffer[65];
register char *p, *db, *de;
long long_val;
- int sl=0;
+ int sl= 0;
ulonglong uval= (ulonglong) val;
if (radix < 0)
{
if (val < 0)
{
- sl=1;
+ sl= 1;
/* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */
uval = (ulonglong)0 - uval;
}
}
- p = &buffer[sizeof(buffer)-1];
+ p= &buffer[sizeof(buffer)-1];
*p='\0';
if (uval == 0)
{
- *--p='0';
+ *--p= '0';
goto cnv;
}
@@ -1091,7 +697,7 @@ size_t my_ll10tostr_ucs2(CHARSET_INFO *cs __attribute__((unused)),
{
ulonglong quo= uval/(uint) 10;
uint rem= (uint) (uval- quo* (uint) 10);
- *--p = '0' + rem;
+ *--p= '0' + rem;
uval= quo;
}
@@ -1099,44 +705,34 @@ size_t my_ll10tostr_ucs2(CHARSET_INFO *cs __attribute__((unused)),
while (long_val != 0)
{
long quo= long_val/10;
- *--p = (char) ('0' + (long_val - quo*10));
+ *--p= (char) ('0' + (long_val - quo*10));
long_val= quo;
}
cnv:
if (sl)
{
- *--p='-';
+ *--p= '-';
}
- for ( db=dst, de=dst+len ; (dst<de) && *p ; p++)
+ for ( db= dst, de= dst + len ; (dst < de) && *p ; p++)
{
- int cnvres=cs->cset->wc_mb(cs, (my_wc_t) p[0], (uchar*) dst, (uchar*) de);
- if (cnvres>0)
- dst+=cnvres;
+ int cnvres= cs->cset->wc_mb(cs, (my_wc_t) p[0], (uchar*) dst, (uchar*) de);
+ if (cnvres > 0)
+ dst+= cnvres;
else
break;
}
- return (int) (dst-db);
+ return (int) (dst -db);
}
+#endif /* HAVE_CHARSET_mb2_or_mb4 */
-#undef ULONGLONG_MAX
-#define ULONGLONG_MAX (~(ulonglong) 0)
-#define MAX_NEGATIVE_NUMBER ((ulonglong) LL(0x8000000000000000))
-#define INIT_CNT 9
-#define LFACTOR ULL(1000000000)
-#define LFACTOR1 ULL(10000000000)
-#define LFACTOR2 ULL(100000000000)
-static unsigned long lfactor[9]=
-{
- 1L, 10L, 100L, 1000L, 10000L, 100000L, 1000000L, 10000000L, 100000000L
-};
-
-
-longlong my_strtoll10_ucs2(CHARSET_INFO *cs __attribute__((unused)),
- const char *nptr, char **endptr, int *error)
+#ifdef HAVE_CHARSET_mb2
+static longlong
+my_strtoll10_mb2(CHARSET_INFO *cs __attribute__((unused)),
+ const char *nptr, char **endptr, int *error)
{
const char *s, *end, *start, *n_end, *true_end;
uchar c;
@@ -1162,11 +758,11 @@ longlong my_strtoll10_ucs2(CHARSET_INFO *cs __attribute__((unused)),
goto no_conv;
}
- /* Check for a sign. */
+ /* Check for a sign. */
negative= 0;
if (!s[0] && s[1] == '-')
{
- *error= -1; /* Mark as negative number */
+ *error= -1; /* Mark as negative number */
negative= 1;
s+= 2;
if (s == end)
@@ -1182,7 +778,7 @@ longlong my_strtoll10_ucs2(CHARSET_INFO *cs __attribute__((unused)),
{
s+= 2;
if (s == end)
- goto no_conv;
+ goto no_conv;
}
cutoff= ULONGLONG_MAX / LFACTOR2;
cutoff2= ULONGLONG_MAX % LFACTOR2 / 100;
@@ -1197,7 +793,7 @@ longlong my_strtoll10_ucs2(CHARSET_INFO *cs __attribute__((unused)),
{
s+= 2;
if (s == end)
- goto end_i; /* Return 0 */
+ goto end_i; /* Return 0 */
}
while (!s[0] && s[1] == '0');
n_end= s + 2 * INIT_CNT;
@@ -1226,7 +822,7 @@ longlong my_strtoll10_ucs2(CHARSET_INFO *cs __attribute__((unused)),
/* Handle next 9 digits and store them in j */
j= 0;
- start= s; /* Used to know how much to shift i */
+ start= s; /* Used to know how much to shift i */
n_end= true_end= s + 2 * INIT_CNT;
if (n_end > end)
n_end= end;
@@ -1266,7 +862,7 @@ longlong my_strtoll10_ucs2(CHARSET_INFO *cs __attribute__((unused)),
li=i*LFACTOR2+ (ulonglong) j*100 + k;
return (longlong) li;
-overflow: /* *endptr is set here */
+overflow: /* *endptr is set here */
*error= MY_ERRNO_ERANGE;
return negative ? LONGLONG_MIN : (longlong) ULONGLONG_MAX;
@@ -1303,6 +899,2242 @@ no_conv:
}
+static size_t
+my_scan_mb2(CHARSET_INFO *cs __attribute__((unused)),
+ const char *str, const char *end, int sequence_type)
+{
+ const char *str0= str;
+ end--; /* for easier loop condition, because of two bytes per character */
+
+ switch (sequence_type)
+ {
+ case MY_SEQ_SPACES:
+ for ( ; str < end; str+= 2)
+ {
+ if (str[0] != '\0' || str[1] != ' ')
+ break;
+ }
+ return (size_t) (str - str0);
+ default:
+ return 0;
+ }
+}
+
+
+static void
+my_fill_mb2(CHARSET_INFO *cs __attribute__((unused)),
+ char *s, size_t l, int fill)
+{
+ for ( ; l >= 2; s[0]= 0, s[1]= fill, s+= 2, l-= 2);
+}
+
+
+static int
+my_vsnprintf_mb2(char *dst, size_t n, const char* fmt, va_list ap)
+{
+ char *start=dst, *end= dst + n - 1;
+ for (; *fmt ; fmt++)
+ {
+ if (fmt[0] != '%')
+ {
+ if (dst == end) /* End of buffer */
+ break;
+
+ *dst++='\0';
+ *dst++= *fmt; /* Copy ordinary char */
+ continue;
+ }
+
+ fmt++;
+
+ /* Skip if max size is used (to be compatible with printf) */
+ while ( (*fmt >= '0' && *fmt <= '9') || *fmt == '.' || *fmt == '-')
+ fmt++;
+
+ if (*fmt == 'l')
+ fmt++;
+
+ if (*fmt == 's') /* String parameter */
+ {
+ char *par= va_arg(ap, char *);
+ size_t plen;
+ size_t left_len= (size_t)(end-dst);
+ if (!par)
+ par= (char*) "(null)";
+ plen= strlen(par);
+ if (left_len <= plen * 2)
+ plen = left_len / 2 - 1;
+
+ for ( ; plen ; plen--, dst+=2, par++)
+ {
+ dst[0]= '\0';
+ dst[1]= par[0];
+ }
+ continue;
+ }
+ else if (*fmt == 'd' || *fmt == 'u') /* Integer parameter */
+ {
+ int iarg;
+ char nbuf[16];
+ char *pbuf= nbuf;
+
+ if ((size_t) (end - dst) < 32)
+ break;
+ iarg= va_arg(ap, int);
+ if (*fmt == 'd')
+ int10_to_str((long) iarg, nbuf, -10);
+ else
+ int10_to_str((long) (uint) iarg, nbuf,10);
+
+ for (; pbuf[0]; pbuf++)
+ {
+ *dst++= '\0';
+ *dst++= *pbuf;
+ }
+ continue;
+ }
+
+ /* We come here on '%%', unknown code or too long parameter */
+ if (dst == end)
+ break;
+ *dst++= '\0';
+ *dst++= '%'; /* % used as % or unknown code */
+ }
+
+ DBUG_ASSERT(dst <= end);
+ *dst='\0'; /* End of errmessage */
+ return (size_t) (dst - start);
+}
+
+
+static size_t
+my_snprintf_mb2(CHARSET_INFO *cs __attribute__((unused)),
+ char* to, size_t n, const char* fmt, ...)
+{
+ va_list args;
+ va_start(args,fmt);
+ return my_vsnprintf_mb2(to, n, fmt, args);
+}
+
+
+static size_t
+my_lengthsp_mb2(CHARSET_INFO *cs __attribute__((unused)),
+ const char *ptr, size_t length)
+{
+ const char *end= ptr + length;
+ while (end > ptr + 1 && end[-1] == ' ' && end[-2] == '\0')
+ end-= 2;
+ return (size_t) (end - ptr);
+}
+
+#endif /* HAVE_CHARSET_mb2*/
+
+
+
+
+#ifdef HAVE_CHARSET_utf16
+
+/*
+ D800..DB7F - Non-provate surrogate high (896 pages)
+ DB80..DBFF - Private surrogate high (128 pages)
+ DC00..DFFF - Surrogate low (1024 codes in a page)
+*/
+
+#define MY_UTF16_HIGH_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xD8)
+#define MY_UTF16_LOW_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xDC)
+#define MY_UTF16_SURROGATE(x) (((x) & 0xF800) == 0xD800)
+
+static int
+my_utf16_uni(CHARSET_INFO *cs __attribute__((unused)),
+ my_wc_t *pwc, const uchar *s, const uchar *e)
+{
+ if (s + 2 > e)
+ return MY_CS_TOOSMALL2;
+
+ /*
+ High bytes: 0xD[89AB] = B'110110??'
+ Low bytes: 0xD[CDEF] = B'110111??'
+ Surrogate mask: 0xFC = B'11111100'
+ */
+
+ if (MY_UTF16_HIGH_HEAD(*s)) /* Surrogate head */
+ {
+ if (s + 4 > e)
+ return MY_CS_TOOSMALL4;
+
+ if (!MY_UTF16_LOW_HEAD(s[2])) /* Broken surrigate pair */
+ return MY_CS_ILSEQ;
+
+ /*
+ s[0]= 110110?? (<< 18)
+ s[1]= ???????? (<< 10)
+ s[2]= 110111?? (<< 8)
+ s[3]= ???????? (<< 0)
+ */
+
+ *pwc= ((s[0] & 3) << 18) + (s[1] << 10) +
+ ((s[2] & 3) << 8) + s[3] + 0x10000;
+
+ return 4;
+ }
+
+ if (MY_UTF16_LOW_HEAD(*s)) /* Low surrogate part without high part */
+ return MY_CS_ILSEQ;
+
+ *pwc= (s[0] << 8) + s[1];
+ return 2;
+}
+
+
+static int
+my_uni_utf16(CHARSET_INFO *cs __attribute__((unused)),
+ my_wc_t wc, uchar *s, uchar *e)
+{
+ if (wc <= 0xFFFF)
+ {
+ if (s + 2 > e)
+ return MY_CS_TOOSMALL2;
+ if (MY_UTF16_SURROGATE(wc))
+ return MY_CS_ILUNI;
+ *s++= (uchar) (wc >> 8);
+ *s= (uchar) (wc & 0xFF);
+ return 2;
+ }
+
+ if (wc <= 0x10FFFF)
+ {
+ if (s + 4 > e)
+ return MY_CS_TOOSMALL4;
+ *s++= (uchar) ((wc-= 0x10000) >> 18) | 0xD8;
+ *s++= (uchar) (wc >> 10) & 0xFF;
+ *s++= (uchar) ((wc >> 8) & 3) | 0xDC;
+ *s= (uchar) wc & 0xFF;
+ return 4;
+ }
+
+ return MY_CS_ILUNI;
+}
+
+
+static inline void
+my_tolower_utf16(MY_UNICASE_INFO **uni_plane, my_wc_t *wc)
+{
+ int page= *wc >> 8;
+ if (page < 256 && uni_plane[page])
+ *wc= uni_plane[page][*wc & 0xFF].tolower;
+}
+
+
+static inline void
+my_toupper_utf16(MY_UNICASE_INFO **uni_plane, my_wc_t *wc)
+{
+ int page= *wc >> 8;
+ if (page < 256 && uni_plane[page])
+ *wc= uni_plane[page][*wc & 0xFF].toupper;
+}
+
+
+static inline void
+my_tosort_utf16(MY_UNICASE_INFO **uni_plane, my_wc_t *wc)
+{
+ int page= *wc >> 8;
+ if (page < 256)
+ {
+ if (uni_plane[page])
+ *wc= uni_plane[page][*wc & 0xFF].sort;
+ }
+ else
+ {
+ *wc= REPLACEMENT_CHAR;
+ }
+}
+
+
+static size_t
+my_caseup_utf16(CHARSET_INFO *cs, char *src, size_t srclen,
+ char *dst __attribute__((unused)),
+ size_t dstlen __attribute__((unused)))
+{
+ my_wc_t wc;
+ int res;
+ char *srcend= src + srclen;
+ MY_UNICASE_INFO **uni_plane= cs->caseinfo;
+ DBUG_ASSERT(src == dst && srclen == dstlen);
+
+ while ((src < srcend) &&
+ (res= my_utf16_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0)
+ {
+ my_toupper_utf16(uni_plane, &wc);
+ if (res != my_uni_utf16(cs, wc, (uchar*) src, (uchar*) srcend))
+ break;
+ src+= res;
+ }
+ return srclen;
+}
+
+
+static void
+my_hash_sort_utf16(CHARSET_INFO *cs, const uchar *s, size_t slen,
+ ulong *n1, ulong *n2)
+{
+ my_wc_t wc;
+ int res;
+ const uchar *e= s+slen;
+ MY_UNICASE_INFO **uni_plane= cs->caseinfo;
+
+ while (e > s + 1 && e[-1] == ' ' && e[-2] == '\0')
+ e-= 2;
+
+ while ((s < e) && (res= my_utf16_uni(cs, &wc, (uchar *)s, (uchar*)e)) > 0)
+ {
+ my_tosort_utf16(uni_plane, &wc);
+ n1[0]^= (((n1[0] & 63) + n2[0]) * (wc & 0xFF)) + (n1[0] << 8);
+ n2[0]+= 3;
+ n1[0]^= (((n1[0] & 63) + n2[0]) * (wc >> 8)) + (n1[0] << 8);
+ n2[0]+= 3;
+ s+= res;
+ }
+}
+
+
+static size_t
+my_casedn_utf16(CHARSET_INFO *cs, char *src, size_t srclen,
+ char *dst __attribute__((unused)),
+ size_t dstlen __attribute__((unused)))
+{
+ my_wc_t wc;
+ int res;
+ char *srcend= src + srclen;
+ MY_UNICASE_INFO **uni_plane= cs->caseinfo;
+ DBUG_ASSERT(src == dst && srclen == dstlen);
+
+ while ((src < srcend) &&
+ (res= my_utf16_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0)
+ {
+ my_tolower_utf16(uni_plane, &wc);
+ if (res != my_uni_utf16(cs, wc, (uchar*) src, (uchar*) srcend))
+ break;
+ src+= res;
+ }
+ return srclen;
+}
+
+
+static int
+my_strnncoll_utf16(CHARSET_INFO *cs,
+ const uchar *s, size_t slen,
+ const uchar *t, size_t tlen,
+ my_bool t_is_prefix)
+{
+ int s_res, t_res;
+ my_wc_t s_wc,t_wc;
+ const uchar *se= s + slen;
+ const uchar *te= t + tlen;
+ MY_UNICASE_INFO **uni_plane= cs->caseinfo;
+ LINT_INIT(s_wc);
+ LINT_INIT(t_wc);
+
+ while (s < se && t < te)
+ {
+ s_res= my_utf16_uni(cs, &s_wc, s, se);
+ t_res= my_utf16_uni(cs, &t_wc, t, te);
+
+ if (s_res <= 0 || t_res <= 0)
+ {
+ /* Incorrect string, compare by char value */
+ return my_bincmp(s, se, t, te);
+ }
+
+ my_tosort_utf16(uni_plane, &s_wc);
+ my_tosort_utf16(uni_plane, &t_wc);
+
+ if (s_wc != t_wc)
+ {
+ return s_wc > t_wc ? 1 : -1;
+ }
+
+ s+= s_res;
+ t+= t_res;
+ }
+ return (int) (t_is_prefix ? (t - te) : ((se - s) - (te - t)));
+}
+
+
+/**
+ Compare strings, discarding end space
+
+ If one string is shorter as the other, then we space extend the other
+ so that the strings have equal length.
+
+ This will ensure that the following things hold:
+
+ "a" == "a "
+ "a\0" < "a"
+ "a\0" < "a "
+
+ @param cs Character set pinter.
+ @param a First string to compare.
+ @param a_length Length of 'a'.
+ @param b Second string to compare.
+ @param b_length Length of 'b'.
+
+ IMPLEMENTATION
+
+ @return Comparison result.
+ @retval Negative number, if a less than b.
+ @retval 0, if a is equal to b
+ @retval Positive number, if a > b
+*/
+
+static int
+my_strnncollsp_utf16(CHARSET_INFO *cs,
+ const uchar *s, size_t slen,
+ const uchar *t, size_t tlen,
+ my_bool diff_if_only_endspace_difference)
+{
+ int res;
+ my_wc_t s_wc, t_wc;
+ const uchar *se= s + slen, *te= t + tlen;
+ MY_UNICASE_INFO **uni_plane= cs->caseinfo;
+ LINT_INIT(s_wc);
+ LINT_INIT(t_wc);
+
+ DBUG_ASSERT((slen % 2) == 0);
+ DBUG_ASSERT((tlen % 2) == 0);
+
+#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
+ diff_if_only_endspace_difference= FALSE;
+#endif
+
+ while (s < se && t < te)
+ {
+ int s_res= my_utf16_uni(cs, &s_wc, s, se);
+ int t_res= my_utf16_uni(cs, &t_wc, t, te);
+
+ if (s_res <= 0 || t_res <= 0)
+ {
+ /* Incorrect string, compare bytewise */
+ return my_bincmp(s, se, t, te);
+ }
+
+ my_tosort_utf16(uni_plane, &s_wc);
+ my_tosort_utf16(uni_plane, &t_wc);
+
+ if (s_wc != t_wc)
+ {
+ return s_wc > t_wc ? 1 : -1;
+ }
+
+ s+= s_res;
+ t+= t_res;
+ }
+
+ slen= (size_t) (se - s);
+ tlen= (size_t) (te - t);
+ res= 0;
+
+ if (slen != tlen)
+ {
+ int s_res, swap= 1;
+ if (diff_if_only_endspace_difference)
+ res= 1; /* Assume 's' is bigger */
+ if (slen < tlen)
+ {
+ slen= tlen;
+ s= t;
+ se= te;
+ swap= -1;
+ res= -res;
+ }
+
+ for ( ; s < se; s+= s_res)
+ {
+ if ((s_res= my_utf16_uni(cs, &s_wc, s, se)) < 0)
+ {
+ DBUG_ASSERT(0);
+ return 0;
+ }
+ if (s_wc != ' ')
+ return (s_wc < ' ') ? -swap : swap;
+ }
+ }
+ return res;
+}
+
+
+static uint
+my_ismbchar_utf16(CHARSET_INFO *cs __attribute__((unused)),
+ const char *b __attribute__((unused)),
+ const char *e __attribute__((unused)))
+{
+ if (b + 2 > e)
+ return 0;
+
+ if (MY_UTF16_HIGH_HEAD(*b))
+ {
+ return (b + 4 <= e) && MY_UTF16_LOW_HEAD(b[2]) ? 4 : 0;
+ }
+
+ if (MY_UTF16_LOW_HEAD(*b))
+ return 0;
+
+ return 2;
+}
+
+
+static uint
+my_mbcharlen_utf16(CHARSET_INFO *cs __attribute__((unused)),
+ uint c __attribute__((unused)))
+{
+ return MY_UTF16_HIGH_HEAD(c) ? 4 : 2;
+}
+
+
+static size_t
+my_numchars_utf16(CHARSET_INFO *cs,
+ const char *b, const char *e)
+{
+ size_t nchars= 0;
+ for ( ; ; nchars++)
+ {
+ size_t charlen= my_ismbchar_utf16(cs, b, e);
+ if (!charlen)
+ break;
+ b+= charlen;
+ }
+ return nchars;
+}
+
+
+static size_t
+my_charpos_utf16(CHARSET_INFO *cs,
+ const char *b, const char *e, size_t pos)
+{
+ const char *b0= b;
+ uint charlen;
+
+ for ( ; pos; b+= charlen, pos--)
+ {
+ if (!(charlen= my_ismbchar(cs, b, e)))
+ return (e + 2 - b0); /* Error, return pos outside the string */
+ }
+ return (size_t) (pos ? (e + 2 - b0) : (b - b0));
+}
+
+
+static size_t
+my_well_formed_len_utf16(CHARSET_INFO *cs,
+ const char *b, const char *e,
+ size_t nchars, int *error)
+{
+ const char *b0= b;
+ uint charlen;
+ *error= 0;
+
+ for ( ; nchars; b+= charlen, nchars--)
+ {
+ if (!(charlen= my_ismbchar(cs, b, e)))
+ {
+ *error= b < e ? 1 : 0;
+ break;
+ }
+ }
+ return (size_t) (b - b0);
+}
+
+
+static int
+my_wildcmp_utf16_ci(CHARSET_INFO *cs,
+ const char *str,const char *str_end,
+ const char *wildstr,const char *wildend,
+ int escape, int w_one, int w_many)
+{
+ MY_UNICASE_INFO **uni_plane= cs->caseinfo;
+ return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
+ escape, w_one, w_many, uni_plane);
+}
+
+
+static int
+my_wildcmp_utf16_bin(CHARSET_INFO *cs,
+ const char *str,const char *str_end,
+ const char *wildstr,const char *wildend,
+ int escape, int w_one, int w_many)
+{
+ return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
+ escape, w_one, w_many, NULL);
+}
+
+
+static int
+my_strnncoll_utf16_bin(CHARSET_INFO *cs,
+ const uchar *s, size_t slen,
+ const uchar *t, size_t tlen,
+ my_bool t_is_prefix)
+{
+ int s_res,t_res;
+ my_wc_t s_wc,t_wc;
+ const uchar *se=s+slen;
+ const uchar *te=t+tlen;
+ LINT_INIT(s_wc);
+ LINT_INIT(t_wc);
+
+ while ( s < se && t < te )
+ {
+ s_res= my_utf16_uni(cs,&s_wc, s, se);
+ t_res= my_utf16_uni(cs,&t_wc, t, te);
+
+ if (s_res <= 0 || t_res <= 0)
+ {
+ /* Incorrect string, compare by char value */
+ return my_bincmp(s, se, t, te);
+ }
+ if (s_wc != t_wc)
+ {
+ return s_wc > t_wc ? 1 : -1;
+ }
+
+ s+= s_res;
+ t+= t_res;
+ }
+ return (int) (t_is_prefix ? (t - te) : ((se - s) - (te - t)));
+}
+
+
+static int
+my_strnncollsp_utf16_bin(CHARSET_INFO *cs,
+ const uchar *s, size_t slen,
+ const uchar *t, size_t tlen,
+ my_bool diff_if_only_endspace_difference)
+{
+ int res;
+ my_wc_t s_wc, t_wc;
+ const uchar *se= s + slen, *te= t + tlen;
+ LINT_INIT(s_wc);
+ LINT_INIT(t_wc);
+
+ DBUG_ASSERT((slen % 2) == 0);
+ DBUG_ASSERT((tlen % 2) == 0);
+
+#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
+ diff_if_only_endspace_difference= FALSE;
+#endif
+
+ while (s < se && t < te)
+ {
+ int s_res= my_utf16_uni(cs, &s_wc, s, se);
+ int t_res= my_utf16_uni(cs, &t_wc, t, te);
+
+ if (s_res <= 0 || t_res <= 0)
+ {
+ /* Incorrect string, compare bytewise */
+ return my_bincmp(s, se, t, te);
+ }
+
+ if (s_wc != t_wc)
+ {
+ return s_wc > t_wc ? 1 : -1;
+ }
+
+ s+= s_res;
+ t+= t_res;
+ }
+
+ slen= (size_t) (se - s);
+ tlen= (size_t) (te - t);
+ res= 0;
+
+ if (slen != tlen)
+ {
+ int s_res, swap= 1;
+ if (diff_if_only_endspace_difference)
+ res= 1; /* Assume 's' is bigger */
+ if (slen < tlen)
+ {
+ slen= tlen;
+ s= t;
+ se= te;
+ swap= -1;
+ res= -res;
+ }
+
+ for ( ; s < se; s+= s_res)
+ {
+ if ((s_res= my_utf16_uni(cs, &s_wc, s, se)) < 0)
+ {
+ DBUG_ASSERT(0);
+ return 0;
+ }
+ if (s_wc != ' ')
+ return (s_wc < ' ') ? -swap : swap;
+ }
+ }
+ return res;
+}
+
+
+static void
+my_hash_sort_utf16_bin(CHARSET_INFO *cs __attribute__((unused)),
+ const uchar *key, size_t len,ulong *nr1, ulong *nr2)
+{
+ const uchar *pos = key;
+
+ key+= len;
+
+ while (key > pos + 1 && key[-1] == ' ' && key[-2] == '\0')
+ key-= 2;
+
+ for (; pos < (uchar*) key ; pos++)
+ {
+ nr1[0]^= (ulong) ((((uint) nr1[0] & 63) + nr2[0]) *
+ ((uint)*pos)) + (nr1[0] << 8);
+ nr2[0]+= 3;
+ }
+}
+
+
+/**
+ Calculate min_str and max_str that ranges a LIKE string.
+
+ @param ptr Pointer to LIKE pattern.
+ @param ptr_length Length of LIKE pattern.
+ @param escape Escape character in LIKE. (Normally '\').
+ All escape characters should be removed
+ from min_str and max_str.
+ @param res_length Length of min_str and max_str.
+ @param min_str Smallest case sensitive string that ranges LIKE.
+ Should be space padded to res_length.
+ @param max_str Largest case sensitive string that ranges LIKE.
+ Normally padded with the biggest character sort value.
+
+ @return Optimization status.
+ @retval FALSE if LIKE pattern can be optimized
+ @rerval TRUE if LIKE can't be optimized.
+*/
+
+my_bool
+my_like_range_utf16(CHARSET_INFO *cs,
+ const char *ptr, size_t ptr_length,
+ pbool escape, pbool w_one, pbool w_many,
+ size_t res_length,
+ char *min_str,char *max_str,
+ size_t *min_length,size_t *max_length)
+{
+ const char *end=ptr+ptr_length;
+ char *min_org=min_str;
+ char *min_end=min_str+res_length;
+ size_t charlen= res_length / cs->mbmaxlen;
+
+ for ( ; ptr + 1 < end && min_str + 1 < min_end && charlen > 0
+ ; ptr+=2, charlen--)
+ {
+ if (ptr[0] == '\0' && ptr[1] == escape && ptr + 1 < end)
+ {
+ ptr+=2; /* Skip escape */
+ *min_str++= *max_str++ = ptr[0];
+ *min_str++= *max_str++ = ptr[1];
+ continue;
+ }
+ if (ptr[0] == '\0' && ptr[1] == w_one) /* '_' in SQL */
+ {
+ *min_str++= (char) (cs->min_sort_char >> 8);
+ *min_str++= (char) (cs->min_sort_char & 255);
+ *max_str++= (char) (cs->max_sort_char >> 8);
+ *max_str++= (char) (cs->max_sort_char & 255);
+ continue;
+ }
+ if (ptr[0] == '\0' && ptr[1] == w_many) /* '%' in SQL */
+ {
+ /*
+ Calculate length of keys:
+ 'a\0\0... is the smallest possible string when we have space expand
+ a\ff\ff... is the biggest possible string
+ */
+ *min_length= ((cs->state & MY_CS_BINSORT) ? (size_t) (min_str - min_org) :
+ res_length);
+ *max_length= res_length;
+ do {
+ *min_str++ = 0;
+ *min_str++ = 0;
+ *max_str++ = (char) (cs->max_sort_char >> 8);
+ *max_str++ = (char) (cs->max_sort_char & 255);
+ } while (min_str + 1 < min_end);
+ return FALSE;
+ }
+ *min_str++= *max_str++ = ptr[0];
+ *min_str++= *max_str++ = ptr[1];
+ }
+
+ /* Temporary fix for handling w_one at end of string (key compression) */
+ {
+ char *tmp;
+ for (tmp= min_str ; tmp-1 > min_org && tmp[-1] == '\0' && tmp[-2]=='\0';)
+ {
+ *--tmp=' ';
+ *--tmp='\0';
+ }
+ }
+
+ *min_length= *max_length = (size_t) (min_str - min_org);
+ while (min_str + 1 < min_end)
+ {
+ *min_str++ = *max_str++ = '\0';
+ *min_str++ = *max_str++ = ' '; /* Because if key compression */
+ }
+ return FALSE;
+}
+
+
+static MY_COLLATION_HANDLER my_collation_utf16_general_ci_handler =
+{
+ NULL, /* init */
+ my_strnncoll_utf16,
+ my_strnncollsp_utf16,
+ my_strnxfrm_unicode,
+ my_strnxfrmlen_simple,
+ my_like_range_utf16,
+ my_wildcmp_utf16_ci,
+ my_strcasecmp_mb2_or_mb4,
+ my_instr_mb,
+ my_hash_sort_utf16,
+ my_propagate_simple
+};
+
+
+static MY_COLLATION_HANDLER my_collation_utf16_bin_handler =
+{
+ NULL, /* init */
+ my_strnncoll_utf16_bin,
+ my_strnncollsp_utf16_bin,
+ my_strnxfrm_unicode,
+ my_strnxfrmlen_simple,
+ my_like_range_utf16,
+ my_wildcmp_utf16_bin,
+ my_strcasecmp_mb2_or_mb4,
+ my_instr_mb,
+ my_hash_sort_utf16_bin,
+ my_propagate_simple
+};
+
+
+MY_CHARSET_HANDLER my_charset_utf16_handler=
+{
+ NULL, /* init */
+ my_ismbchar_utf16, /* ismbchar */
+ my_mbcharlen_utf16, /* mbcharlen */
+ my_numchars_utf16,
+ my_charpos_utf16,
+ my_well_formed_len_utf16,
+ my_lengthsp_mb2,
+ my_numcells_mb,
+ my_utf16_uni, /* mb_wc */
+ my_uni_utf16, /* wc_mb */
+ my_mb_ctype_mb,
+ my_caseup_str_mb2_or_mb4,
+ my_casedn_str_mb2_or_mb4,
+ my_caseup_utf16,
+ my_casedn_utf16,
+ my_snprintf_mb2,
+ my_l10tostr_mb2_or_mb4,
+ my_ll10tostr_mb2_or_mb4,
+ my_fill_mb2,
+ my_strntol_mb2_or_mb4,
+ my_strntoul_mb2_or_mb4,
+ my_strntoll_mb2_or_mb4,
+ my_strntoull_mb2_or_mb4,
+ my_strntod_mb2_or_mb4,
+ my_strtoll10_mb2,
+ my_strntoull10rnd_mb2_or_mb4,
+ my_scan_mb2
+};
+
+
+CHARSET_INFO my_charset_utf16_general_ci=
+{
+ 54,0,0, /* number */
+ MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
+ "utf16", /* cs name */
+ "utf16_general_ci", /* name */
+ "UTF-16 Unicode", /* comment */
+ NULL, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 1, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 2, /* mbminlen */
+ 4, /* mbmaxlen */
+ 0, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf16_handler,
+ &my_collation_utf16_general_ci_handler
+};
+
+
+CHARSET_INFO my_charset_utf16_bin=
+{
+ 55,0,0, /* number */
+ MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONASCII,
+ "utf16", /* cs name */
+ "utf16_bin", /* name */
+ "UTF-16 Unicode", /* comment */
+ NULL, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 1, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 2, /* mbminlen */
+ 4, /* mbmaxlen */
+ 0, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf16_handler,
+ &my_collation_utf16_bin_handler
+};
+
+#endif /* HAVE_CHARSET_utf16 */
+
+
+#ifdef HAVE_CHARSET_utf32
+
+static int
+my_utf32_uni(CHARSET_INFO *cs __attribute__((unused)),
+ my_wc_t *pwc, const uchar *s, const uchar *e)
+{
+ if (s + 4 > e)
+ return MY_CS_TOOSMALL4;
+ *pwc= (s[0] << 24) + (s[1] << 16) + (s[2] << 8) + (s[3]);
+ return 4;
+}
+
+
+static int
+my_uni_utf32(CHARSET_INFO *cs __attribute__((unused)),
+ my_wc_t wc, uchar *s, uchar *e)
+{
+ if (s + 4 > e)
+ return MY_CS_TOOSMALL4;
+
+ s[0]= (uchar) (wc >> 24);
+ s[1]= (uchar) (wc >> 16) & 0xFF;
+ s[2]= (uchar) (wc >> 8) & 0xFF;
+ s[3]= (uchar) wc & 0xFF;
+ return 4;
+}
+
+
+static inline void
+my_tolower_utf32(MY_UNICASE_INFO **uni_plane, my_wc_t *wc)
+{
+ int page= *wc >> 8;
+ if (page < 256 && uni_plane[page])
+ *wc= uni_plane[page][*wc & 0xFF].tolower;
+}
+
+
+static inline void
+my_toupper_utf32(MY_UNICASE_INFO **uni_plane, my_wc_t *wc)
+{
+ int page= *wc >> 8;
+ if (page < 256 && uni_plane[page])
+ *wc= uni_plane[page][*wc & 0xFF].toupper;
+}
+
+
+static inline void
+my_tosort_utf32(MY_UNICASE_INFO **uni_plane, my_wc_t *wc)
+{
+ int page= *wc >> 8;
+ if (page < 256)
+ {
+ if (uni_plane[page])
+ *wc= uni_plane[page][*wc & 0xFF].sort;
+ }
+ else
+ {
+ *wc= REPLACEMENT_CHAR;
+ }
+}
+
+
+static size_t
+my_caseup_utf32(CHARSET_INFO *cs, char *src, size_t srclen,
+ char *dst __attribute__((unused)),
+ size_t dstlen __attribute__((unused)))
+{
+ my_wc_t wc;
+ int res;
+ char *srcend= src + srclen;
+ MY_UNICASE_INFO **uni_plane= cs->caseinfo;
+ DBUG_ASSERT(src == dst && srclen == dstlen);
+
+ while ((src < srcend) &&
+ (res= my_utf32_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0)
+ {
+ my_toupper_utf32(uni_plane, &wc);
+ if (res != my_uni_utf32(cs, wc, (uchar*) src, (uchar*) srcend))
+ break;
+ src+= res;
+ }
+ return srclen;
+}
+
+
+static inline void
+my_hash_add(ulong *n1, ulong *n2, uint ch)
+{
+ n1[0]^= (((n1[0] & 63) + n2[0]) * (ch)) + (n1[0] << 8);
+ n2[0]+= 3;
+}
+
+
+static void
+my_hash_sort_utf32(CHARSET_INFO *cs, const uchar *s, size_t slen,
+ ulong *n1, ulong *n2)
+{
+ my_wc_t wc;
+ int res;
+ const uchar *e= s + slen;
+ MY_UNICASE_INFO **uni_plane= cs->caseinfo;
+
+ /* Skip trailing spaces */
+ while (e > s + 3 && e[-1] == ' ' && !e[-2] && !e[-3] && !e[-4])
+ e-= 4;
+
+ while ((res= my_utf32_uni(cs, &wc, (uchar*) s, (uchar*) e)) > 0)
+ {
+ my_tosort_utf32(uni_plane, &wc);
+ my_hash_add(n1, n2, (uint) (wc >> 24));
+ my_hash_add(n1, n2, (uint) (wc >> 16) & 0xFF);
+ my_hash_add(n1, n2, (uint) (wc >> 8) & 0xFF);
+ my_hash_add(n1, n2, (uint) (wc & 0xFF));
+ s+= res;
+ }
+}
+
+
+static size_t
+my_casedn_utf32(CHARSET_INFO *cs, char *src, size_t srclen,
+ char *dst __attribute__((unused)),
+ size_t dstlen __attribute__((unused)))
+{
+ my_wc_t wc;
+ int res;
+ char *srcend= src + srclen;
+ MY_UNICASE_INFO **uni_plane= cs->caseinfo;
+ DBUG_ASSERT(src == dst && srclen == dstlen);
+
+ while ((res= my_utf32_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0)
+ {
+ my_tolower_utf32(uni_plane,&wc);
+ if (res != my_uni_utf32(cs, wc, (uchar*) src, (uchar*) srcend))
+ break;
+ src+= res;
+ }
+ return srclen;
+}
+
+
+static int
+my_strnncoll_utf32(CHARSET_INFO *cs,
+ const uchar *s, size_t slen,
+ const uchar *t, size_t tlen,
+ my_bool t_is_prefix)
+{
+ my_wc_t s_wc,t_wc;
+ const uchar *se= s + slen;
+ const uchar *te= t + tlen;
+ MY_UNICASE_INFO **uni_plane= cs->caseinfo;
+ LINT_INIT(s_wc);
+ LINT_INIT(t_wc);
+
+ while (s < se && t < te)
+ {
+ int s_res= my_utf32_uni(cs, &s_wc, s, se);
+ int t_res= my_utf32_uni(cs, &t_wc, t, te);
+
+ if ( s_res <= 0 || t_res <= 0)
+ {
+ /* Incorrect string, compare by char value */
+ return my_bincmp(s, se, t, te);
+ }
+
+ my_tosort_utf32(uni_plane, &s_wc);
+ my_tosort_utf32(uni_plane, &t_wc);
+
+ if (s_wc != t_wc)
+ {
+ return s_wc > t_wc ? 1 : -1;
+ }
+
+ s+= s_res;
+ t+= t_res;
+ }
+ return (int) (t_is_prefix ? (t - te) : ((se - s) - (te - t)));
+}
+
+
+/**
+ Compare strings, discarding end space
+
+ If one string is shorter as the other, then we space extend the other
+ so that the strings have equal length.
+
+ This will ensure that the following things hold:
+
+ "a" == "a "
+ "a\0" < "a"
+ "a\0" < "a "
+
+ @param cs Character set pinter.
+ @param a First string to compare.
+ @param a_length Length of 'a'.
+ @param b Second string to compare.
+ @param b_length Length of 'b'.
+
+ IMPLEMENTATION
+
+ @return Comparison result.
+ @retval Negative number, if a less than b.
+ @retval 0, if a is equal to b
+ @retval Positive number, if a > b
+*/
+
+
+static int
+my_strnncollsp_utf32(CHARSET_INFO *cs,
+ const uchar *s, size_t slen,
+ const uchar *t, size_t tlen,
+ my_bool diff_if_only_endspace_difference)
+{
+ int res;
+ my_wc_t s_wc, t_wc;
+ const uchar *se= s + slen, *te= t + tlen;
+ MY_UNICASE_INFO **uni_plane= cs->caseinfo;
+ LINT_INIT(s_wc);
+ LINT_INIT(t_wc);
+
+ DBUG_ASSERT((slen % 4) == 0);
+ DBUG_ASSERT((tlen % 4) == 0);
+
+#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
+ diff_if_only_endspace_difference= FALSE;
+#endif
+
+ while ( s < se && t < te )
+ {
+ int s_res= my_utf32_uni(cs, &s_wc, s, se);
+ int t_res= my_utf32_uni(cs, &t_wc, t, te);
+
+ if ( s_res <= 0 || t_res <= 0 )
+ {
+ /* Incorrect string, compare bytewise */
+ return my_bincmp(s, se, t, te);
+ }
+
+ my_tosort_utf32(uni_plane, &s_wc);
+ my_tosort_utf32(uni_plane, &t_wc);
+
+ if ( s_wc != t_wc )
+ {
+ return s_wc > t_wc ? 1 : -1;
+ }
+
+ s+= s_res;
+ t+= t_res;
+ }
+
+ slen= (size_t) (se - s);
+ tlen= (size_t) (te - t);
+ res= 0;
+
+ if (slen != tlen)
+ {
+ int s_res, swap= 1;
+ if (diff_if_only_endspace_difference)
+ res= 1; /* Assume 's' is bigger */
+ if (slen < tlen)
+ {
+ slen= tlen;
+ s= t;
+ se= te;
+ swap= -1;
+ res= -res;
+ }
+
+ for ( ; s < se; s+= s_res)
+ {
+ if ((s_res= my_utf32_uni(cs, &s_wc, s, se)) < 0)
+ {
+ DBUG_ASSERT(0);
+ return 0;
+ }
+ if (s_wc != ' ')
+ return (s_wc < ' ') ? -swap : swap;
+ }
+ }
+ return res;
+}
+
+
+static size_t
+my_strnxfrmlen_utf32(CHARSET_INFO *cs __attribute__((unused)), size_t len)
+{
+ return len / 2;
+}
+
+
+static uint
+my_ismbchar_utf32(CHARSET_INFO *cs __attribute__((unused)),
+ const char *b __attribute__((unused)),
+ const char *e __attribute__((unused)))
+{
+ return 4;
+}
+
+
+static uint
+my_mbcharlen_utf32(CHARSET_INFO *cs __attribute__((unused)) ,
+ uint c __attribute__((unused)))
+{
+ return 4;
+}
+
+
+static int
+my_vsnprintf_utf32(char *dst, size_t n, const char* fmt, va_list ap)
+{
+ char *start= dst, *end= dst + n;
+ DBUG_ASSERT((n % 4) == 0);
+ for (; *fmt ; fmt++)
+ {
+ if (fmt[0] != '%')
+ {
+ if (dst >= end) /* End of buffer */
+ break;
+
+ *dst++= '\0';
+ *dst++= '\0';
+ *dst++= '\0';
+ *dst++= *fmt; /* Copy ordinary char */
+ continue;
+ }
+
+ fmt++;
+
+ /* Skip if max size is used (to be compatible with printf) */
+ while ( (*fmt>='0' && *fmt<='9') || *fmt == '.' || *fmt == '-')
+ fmt++;
+
+ if (*fmt == 'l')
+ fmt++;
+
+ if (*fmt == 's') /* String parameter */
+ {
+ reg2 char *par= va_arg(ap, char *);
+ size_t plen;
+ size_t left_len= (size_t)(end - dst);
+ if (!par) par= (char*)"(null)";
+ plen= strlen(par);
+ if (left_len <= plen*4)
+ plen= left_len / 4 - 1;
+
+ for ( ; plen ; plen--, dst+= 4, par++)
+ {
+ dst[0]= '\0';
+ dst[1]= '\0';
+ dst[2]= '\0';
+ dst[3]= par[0];
+ }
+ continue;
+ }
+ else if (*fmt == 'd' || *fmt == 'u') /* Integer parameter */
+ {
+ register int iarg;
+ char nbuf[16];
+ char *pbuf= nbuf;
+
+ if ((size_t) (end - dst) < 64)
+ break;
+ iarg= va_arg(ap, int);
+ if (*fmt == 'd')
+ int10_to_str((long) iarg, nbuf, -10);
+ else
+ int10_to_str((long) (uint) iarg,nbuf,10);
+
+ for (; pbuf[0]; pbuf++)
+ {
+ *dst++= '\0';
+ *dst++= '\0';
+ *dst++= '\0';
+ *dst++= *pbuf;
+ }
+ continue;
+ }
+
+ /* We come here on '%%', unknown code or too long parameter */
+ if (dst == end)
+ break;
+ *dst++= '\0';
+ *dst++= '\0';
+ *dst++= '\0';
+ *dst++= '%'; /* % used as % or unknown code */
+ }
+
+ DBUG_ASSERT(dst < end);
+ *dst++= '\0';
+ *dst++= '\0';
+ *dst++= '\0';
+ *dst++= '\0'; /* End of errmessage */
+ return (size_t) (dst - start - 4);
+}
+
+
+static size_t
+my_snprintf_utf32(CHARSET_INFO *cs __attribute__((unused)),
+ char* to, size_t n, const char* fmt, ...)
+{
+ va_list args;
+ va_start(args,fmt);
+ return my_vsnprintf_utf32(to, n, fmt, args);
+}
+
+
+static longlong
+my_strtoll10_utf32(CHARSET_INFO *cs __attribute__((unused)),
+ const char *nptr, char **endptr, int *error)
+{
+ const char *s, *end, *start, *n_end, *true_end;
+ uchar c;
+ unsigned long i, j, k;
+ ulonglong li;
+ int negative;
+ ulong cutoff, cutoff2, cutoff3;
+
+ s= nptr;
+ /* If fixed length string */
+ if (endptr)
+ {
+ /* Make sure string length is even */
+ end= s + ((*endptr - s) / 4) * 4;
+ while (s < end && !s[0] && !s[1] && !s[2] &&
+ (s[3] == ' ' || s[3] == '\t'))
+ s+= 4;
+ if (s == end)
+ goto no_conv;
+ }
+ else
+ {
+ /* We don't support null terminated strings in UCS2 */
+ goto no_conv;
+ }
+
+ /* Check for a sign. */
+ negative= 0;
+ if (!s[0] && !s[1] && !s[2] && s[3] == '-')
+ {
+ *error= -1; /* Mark as negative number */
+ negative= 1;
+ s+= 4;
+ if (s == end)
+ goto no_conv;
+ cutoff= MAX_NEGATIVE_NUMBER / LFACTOR2;
+ cutoff2= (MAX_NEGATIVE_NUMBER % LFACTOR2) / 100;
+ cutoff3= MAX_NEGATIVE_NUMBER % 100;
+ }
+ else
+ {
+ *error= 0;
+ if (!s[0] && !s[1] && !s[2] && s[3] == '+')
+ {
+ s+= 4;
+ if (s == end)
+ goto no_conv;
+ }
+ cutoff= ULONGLONG_MAX / LFACTOR2;
+ cutoff2= ULONGLONG_MAX % LFACTOR2 / 100;
+ cutoff3= ULONGLONG_MAX % 100;
+ }
+
+ /* Handle case where we have a lot of pre-zero */
+ if (!s[0] && !s[1] && !s[2] && s[3] == '0')
+ {
+ i= 0;
+ do
+ {
+ s+= 4;
+ if (s == end)
+ goto end_i; /* Return 0 */
+ }
+ while (!s[0] && !s[1] && !s[2] && s[3] == '0');
+ n_end= s + 4 * INIT_CNT;
+ }
+ else
+ {
+ /* Read first digit to check that it's a valid number */
+ if (s[0] || s[1] || s[2] || (c= (s[3]-'0')) > 9)
+ goto no_conv;
+ i= c;
+ s+= 4;
+ n_end= s + 4 * (INIT_CNT-1);
+ }
+
+ /* Handle first 9 digits and store them in i */
+ if (n_end > end)
+ n_end= end;
+ for (; s != n_end ; s+= 4)
+ {
+ if (s[0] || s[1] || s[2] || (c= (s[3] - '0')) > 9)
+ goto end_i;
+ i= i * 10 + c;
+ }
+ if (s == end)
+ goto end_i;
+
+ /* Handle next 9 digits and store them in j */
+ j= 0;
+ start= s; /* Used to know how much to shift i */
+ n_end= true_end= s + 4 * INIT_CNT;
+ if (n_end > end)
+ n_end= end;
+ do
+ {
+ if (s[0] || s[1] || s[2] || (c= (s[3] - '0')) > 9)
+ goto end_i_and_j;
+ j= j * 10 + c;
+ s+= 4;
+ } while (s != n_end);
+ if (s == end)
+ {
+ if (s != true_end)
+ goto end_i_and_j;
+ goto end3;
+ }
+ if (s[0] || s[1] || s[2] || (c= (s[3] - '0')) > 9)
+ goto end3;
+
+ /* Handle the next 1 or 2 digits and store them in k */
+ k=c;
+ s+= 4;
+ if (s == end || s[0] || s[1] || s[2] || (c= (s[3]-'0')) > 9)
+ goto end4;
+ k= k * 10 + c;
+ s+= 2;
+ *endptr= (char*) s;
+
+ /* number string should have ended here */
+ if (s != end && !s[0] && !s[1] && !s[2] && (c= (s[3] - '0')) <= 9)
+ goto overflow;
+
+ /* Check that we didn't get an overflow with the last digit */
+ if (i > cutoff || (i == cutoff && ((j > cutoff2 || j == cutoff2) &&
+ k > cutoff3)))
+ goto overflow;
+ li= i * LFACTOR2+ (ulonglong) j * 100 + k;
+ return (longlong) li;
+
+overflow: /* *endptr is set here */
+ *error= MY_ERRNO_ERANGE;
+ return negative ? LONGLONG_MIN : (longlong) ULONGLONG_MAX;
+
+end_i:
+ *endptr= (char*) s;
+ return (negative ? ((longlong) -(long) i) : (longlong) i);
+
+end_i_and_j:
+ li= (ulonglong) i * lfactor[(size_t) (s-start) / 4] + j;
+ *endptr= (char*) s;
+ return (negative ? -((longlong) li) : (longlong) li);
+
+end3:
+ li= (ulonglong) i*LFACTOR+ (ulonglong) j;
+ *endptr= (char*) s;
+ return (negative ? -((longlong) li) : (longlong) li);
+
+end4:
+ li= (ulonglong) i*LFACTOR1+ (ulonglong) j * 10 + k;
+ *endptr= (char*) s;
+ if (negative)
+ {
+ if (li > MAX_NEGATIVE_NUMBER)
+ goto overflow;
+ return -((longlong) li);
+ }
+ return (longlong) li;
+
+no_conv:
+ /* There was no number to convert. */
+ *error= MY_ERRNO_EDOM;
+ *endptr= (char *) nptr;
+ return 0;
+}
+
+
+static size_t
+my_numchars_utf32(CHARSET_INFO *cs __attribute__((unused)),
+ const char *b, const char *e)
+{
+ return (size_t) (e - b) / 4;
+}
+
+
+static size_t
+my_charpos_utf32(CHARSET_INFO *cs __attribute__((unused)),
+ const char *b, const char *e, size_t pos)
+{
+ size_t string_length= (size_t) (e - b);
+ return pos * 4 > string_length ? string_length + 4 : pos * 4;
+}
+
+
+static size_t
+my_well_formed_len_utf32(CHARSET_INFO *cs __attribute__((unused)),
+ const char *b, const char *e,
+ size_t nchars, int *error)
+{
+ /* Ensure string length is divisible by 4 */
+ const char *b0= b;
+ size_t length= e - b;
+ DBUG_ASSERT((length % 4) == 0);
+ *error= 0;
+ nchars*= 4;
+ if (length > nchars)
+ {
+ length= nchars;
+ e= b + nchars;
+ }
+ for (; b < e; b+= 4)
+ {
+ /* Don't accept characters greater than U+10FFFF */
+ if (b[0] || (uchar) b[1] > 0x10)
+ {
+ *error= 1;
+ return b - b0;
+ }
+ }
+ return length;
+}
+
+
+static
+void my_fill_utf32(CHARSET_INFO *cs,
+ char *s, size_t slen, int fill)
+{
+ char buf[10];
+ uint buflen;
+ char *e= s + slen;
+
+ DBUG_ASSERT((slen % 4) == 0);
+
+ buflen= cs->cset->wc_mb(cs, (my_wc_t) fill, (uchar*) buf,
+ (uchar*) buf + sizeof(buf));
+ DBUG_ASSERT(buflen == 4);
+ while (s < e)
+ {
+ memcpy(s, buf, 4);
+ s+= 4;
+ }
+}
+
+
+static size_t
+my_lengthsp_utf32(CHARSET_INFO *cs __attribute__((unused)),
+ const char *ptr, size_t length)
+{
+ const char *end= ptr + length;
+ DBUG_ASSERT((length % 4) == 0);
+ while (end > ptr + 3 && end[-1] == ' ' && !end[-2] && !end[-3] && !end[-4])
+ end-= 4;
+ return (size_t) (end - ptr);
+}
+
+
+static int
+my_wildcmp_utf32_ci(CHARSET_INFO *cs,
+ const char *str, const char *str_end,
+ const char *wildstr, const char *wildend,
+ int escape, int w_one, int w_many)
+{
+ MY_UNICASE_INFO **uni_plane= cs->caseinfo;
+ return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
+ escape, w_one, w_many, uni_plane);
+}
+
+
+static int
+my_wildcmp_utf32_bin(CHARSET_INFO *cs,
+ const char *str,const char *str_end,
+ const char *wildstr,const char *wildend,
+ int escape, int w_one, int w_many)
+{
+ return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
+ escape, w_one, w_many, NULL);
+}
+
+
+static int
+my_strnncoll_utf32_bin(CHARSET_INFO *cs,
+ const uchar *s, size_t slen,
+ const uchar *t, size_t tlen,
+ my_bool t_is_prefix)
+{
+ my_wc_t s_wc, t_wc;
+ const uchar *se= s + slen;
+ const uchar *te= t + tlen;
+ LINT_INIT(s_wc);
+ LINT_INIT(t_wc);
+
+ while (s < se && t < te)
+ {
+ int s_res= my_utf32_uni(cs, &s_wc, s, se);
+ int t_res= my_utf32_uni(cs, &t_wc, t, te);
+
+ if (s_res <= 0 || t_res <= 0)
+ {
+ /* Incorrect string, compare by char value */
+ return my_bincmp(s, se, t, te);
+ }
+ if (s_wc != t_wc)
+ {
+ return s_wc > t_wc ? 1 : -1;
+ }
+
+ s+= s_res;
+ t+= t_res;
+ }
+ return (int) (t_is_prefix ? (t-te) : ((se - s) - (te - t)));
+}
+
+
+static inline my_wc_t
+my_utf32_get(const uchar *s)
+{
+ return
+ ((my_wc_t) s[0] << 24) +
+ ((my_wc_t) s[1] << 16) +
+ ((my_wc_t) s[2] << 8) +
+ s[3];
+}
+
+
+static int
+my_strnncollsp_utf32_bin(CHARSET_INFO *cs __attribute__((unused)),
+ const uchar *s, size_t slen,
+ const uchar *t, size_t tlen,
+ my_bool diff_if_only_endspace_difference
+ __attribute__((unused)))
+{
+ const uchar *se, *te;
+ size_t minlen;
+
+ DBUG_ASSERT((slen % 4) == 0);
+ DBUG_ASSERT((tlen % 4) == 0);
+
+ se= s + slen;
+ te= t + tlen;
+
+ for (minlen= min(slen, tlen); minlen; minlen-= 4)
+ {
+ my_wc_t s_wc= my_utf32_get(s);
+ my_wc_t t_wc= my_utf32_get(t);
+ if (s_wc != t_wc)
+ return s_wc > t_wc ? 1 : -1;
+
+ s+= 4;
+ t+= 4;
+ }
+
+ if (slen != tlen)
+ {
+ int swap= 1;
+ if (slen < tlen)
+ {
+ s= t;
+ se= te;
+ swap= -1;
+ }
+
+ for ( ; s < se ; s+= 4)
+ {
+ my_wc_t s_wc= my_utf32_get(s);
+ if (s_wc != ' ')
+ return (s_wc < ' ') ? -swap : swap;
+ }
+ }
+ return 0;
+}
+
+
+/**
+ Calculate min_str and max_str that ranges a LIKE string.
+
+ @param ptr Pointer to LIKE pattern.
+ @param ptr_length Length of LIKE pattern.
+ @param escape Escape character in LIKE. (Normally '\').
+ All escape characters should be removed
+ from min_str and max_str.
+ @param res_length Length of min_str and max_str.
+ @param min_str Smallest case sensitive string that ranges LIKE.
+ Should be space padded to res_length.
+ @param max_str Largest case sensitive string that ranges LIKE.
+ Normally padded with the biggest character sort value.
+
+ @return Optimization status.
+ @retval FALSE if LIKE pattern can be optimized
+ @rerval TRUE if LIKE can't be optimized.
+*/
+
+my_bool
+my_like_range_utf32(CHARSET_INFO *cs,
+ const char *ptr, size_t ptr_length,
+ pbool escape, pbool w_one, pbool w_many,
+ size_t res_length,
+ char *min_str,char *max_str,
+ size_t *min_length,size_t *max_length)
+{
+ const char *end= ptr + ptr_length;
+ char *min_org= min_str;
+ char *min_end= min_str + res_length;
+ char *max_end= max_str + res_length;
+ size_t charlen= res_length / cs->mbmaxlen;
+
+ DBUG_ASSERT((res_length % 4) == 0);
+
+ for ( ; charlen > 0; ptr+= 4, charlen--)
+ {
+ my_wc_t wc;
+ int res;
+ if ((res= my_utf32_uni(cs, &wc, ptr, end)) < 0)
+ {
+ my_fill_utf32(cs, min_str, min_end - min_str, cs->min_sort_char);
+ my_fill_utf32(cs, max_str, min_end - min_str, cs->max_sort_char);
+ /* min_length and max_legnth are not important */
+ return TRUE;
+ }
+
+ if (wc == (my_wc_t) escape)
+ {
+ ptr+= 4; /* Skip escape */
+ if ((res= my_utf32_uni(cs, &wc, ptr, end)) < 0)
+ {
+ my_fill_utf32(cs, min_str, min_end - min_str, cs->min_sort_char);
+ my_fill_utf32(cs, max_str, max_end - min_str, cs->max_sort_char);
+ /* min_length and max_length are not important */
+ return TRUE;
+ }
+ if (my_uni_utf32(cs, wc, min_str, min_end) != 4 ||
+ my_uni_utf32(cs, wc, max_str, max_end) != 4)
+ goto pad_set_lengths;
+ *min_str++= 4;
+ *max_str++= 4;
+ continue;
+ }
+
+ if (wc == (my_wc_t) w_one)
+ {
+ if (my_uni_utf32(cs, cs->min_sort_char, min_str, min_end) != 4 ||
+ my_uni_utf32(cs, cs->max_sort_char, max_str, max_end) != 4)
+ goto pad_set_lengths;
+ min_str+= 4;
+ max_str+= 4;
+ continue;
+ }
+
+ if (wc == (my_wc_t) w_many)
+ {
+ /*
+ Calculate length of keys:
+ 'a\0\0... is the smallest possible string when we have space expand
+ a\ff\ff... is the biggest possible string
+ */
+ *min_length= ((cs->state & MY_CS_BINSORT) ?
+ (size_t) (min_str - min_org) :
+ res_length);
+ *max_length= res_length;
+ goto pad_min_max;
+ }
+
+ /* Normal character */
+ if (my_uni_utf32(cs, wc, min_str, min_end) != 4 ||
+ my_uni_utf32(cs, wc, max_str, max_end) != 4)
+ goto pad_set_lengths;
+ min_str+= 4;
+ max_str+= 4;
+ }
+
+pad_set_lengths:
+ *min_length= *max_length= (size_t) (min_str - min_org);
+
+pad_min_max:
+ my_fill_utf32(cs, min_str, min_end - min_str, cs->min_sort_char);
+ my_fill_utf32(cs, max_str, max_end - max_str, cs->max_sort_char);
+ return FALSE;
+}
+
+
+static size_t
+my_scan_utf32(CHARSET_INFO *cs,
+ const char *str, const char *end, int sequence_type)
+{
+ const char *str0= str;
+
+ switch (sequence_type)
+ {
+ case MY_SEQ_SPACES:
+ for ( ; str < end; )
+ {
+ my_wc_t wc;
+ int res= my_utf32_uni(cs, &wc, str, end);
+ if (res < 0 || wc != ' ')
+ break;
+ str+= res;
+ }
+ return (size_t) (str - str0);
+ default:
+ return 0;
+ }
+}
+
+
+static MY_COLLATION_HANDLER my_collation_utf32_general_ci_handler =
+{
+ NULL, /* init */
+ my_strnncoll_utf32,
+ my_strnncollsp_utf32,
+ my_strnxfrm_unicode,
+ my_strnxfrmlen_utf32,
+ my_like_range_utf32,
+ my_wildcmp_utf32_ci,
+ my_strcasecmp_mb2_or_mb4,
+ my_instr_mb,
+ my_hash_sort_utf32,
+ my_propagate_simple
+};
+
+
+static MY_COLLATION_HANDLER my_collation_utf32_bin_handler =
+{
+ NULL, /* init */
+ my_strnncoll_utf32_bin,
+ my_strnncollsp_utf32_bin,
+ my_strnxfrm_unicode,
+ my_strnxfrmlen_utf32,
+ my_like_range_utf32,
+ my_wildcmp_utf32_bin,
+ my_strcasecmp_mb2_or_mb4,
+ my_instr_mb,
+ my_hash_sort_utf32,
+ my_propagate_simple
+};
+
+
+MY_CHARSET_HANDLER my_charset_utf32_handler=
+{
+ NULL, /* init */
+ my_ismbchar_utf32,
+ my_mbcharlen_utf32,
+ my_numchars_utf32,
+ my_charpos_utf32,
+ my_well_formed_len_utf32,
+ my_lengthsp_utf32,
+ my_numcells_mb,
+ my_utf32_uni,
+ my_uni_utf32,
+ my_mb_ctype_mb,
+ my_caseup_str_mb2_or_mb4,
+ my_casedn_str_mb2_or_mb4,
+ my_caseup_utf32,
+ my_casedn_utf32,
+ my_snprintf_utf32,
+ my_l10tostr_mb2_or_mb4,
+ my_ll10tostr_mb2_or_mb4,
+ my_fill_utf32,
+ my_strntol_mb2_or_mb4,
+ my_strntoul_mb2_or_mb4,
+ my_strntoll_mb2_or_mb4,
+ my_strntoull_mb2_or_mb4,
+ my_strntod_mb2_or_mb4,
+ my_strtoll10_utf32,
+ my_strntoull10rnd_mb2_or_mb4,
+ my_scan_utf32
+};
+
+
+CHARSET_INFO my_charset_utf32_general_ci=
+{
+ 60,0,0, /* number */
+ MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
+ "utf32", /* cs name */
+ "utf32_general_ci", /* name */
+ "UTF-32 Unicode", /* comment */
+ NULL, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 1, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 4, /* mbminlen */
+ 4, /* mbmaxlen */
+ 0, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf32_handler,
+ &my_collation_utf32_general_ci_handler
+};
+
+
+CHARSET_INFO my_charset_utf32_bin=
+{
+ 61,0,0, /* number */
+ MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONASCII,
+ "utf32", /* cs name */
+ "utf32_bin", /* name */
+ "UTF-32 Unicode", /* comment */
+ NULL, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 1, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 4, /* mbminlen */
+ 4, /* mbmaxlen */
+ 0, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf32_handler,
+ &my_collation_utf32_bin_handler
+};
+
+
+#endif /* HAVE_CHARSET_utf32 */
+
+
+#ifdef HAVE_CHARSET_ucs2
+
+static uchar ctype_ucs2[] = {
+ 0,
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
+ 16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16,
+ 16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+static uchar to_lower_ucs2[] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+ 64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
+ 112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95,
+ 96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
+ 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
+ 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
+ 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
+ 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
+ 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
+ 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
+ 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
+ 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
+ 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
+};
+
+static uchar to_upper_ucs2[] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+ 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+ 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
+ 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+ 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
+ 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
+ 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
+ 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
+ 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
+ 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
+ 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
+ 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
+ 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
+};
+
+
+static int my_ucs2_uni(CHARSET_INFO *cs __attribute__((unused)),
+ my_wc_t * pwc, const uchar *s, const uchar *e)
+{
+ if (s+2 > e) /* Need 2 characters */
+ return MY_CS_TOOSMALL2;
+
+ *pwc= ((uchar)s[0]) * 256 + ((uchar)s[1]);
+ return 2;
+}
+
+static int my_uni_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
+ my_wc_t wc, uchar *r, uchar *e)
+{
+ if ( r+2 > e )
+ return MY_CS_TOOSMALL2;
+
+ r[0]= (uchar) (wc >> 8);
+ r[1]= (uchar) (wc & 0xFF);
+ return 2;
+}
+
+
+static size_t my_caseup_ucs2(CHARSET_INFO *cs, char *src, size_t srclen,
+ char *dst __attribute__((unused)),
+ size_t dstlen __attribute__((unused)))
+{
+ my_wc_t wc;
+ int res;
+ char *srcend= src + srclen;
+ MY_UNICASE_INFO **uni_plane= cs->caseinfo;
+ DBUG_ASSERT(src == dst && srclen == dstlen);
+
+ while ((src < srcend) &&
+ (res= my_ucs2_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0)
+ {
+ int plane= (wc>>8) & 0xFF;
+ wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].toupper : wc;
+ if (res != my_uni_ucs2(cs, wc, (uchar*) src, (uchar*) srcend))
+ break;
+ src+= res;
+ }
+ return srclen;
+}
+
+
+static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, size_t slen,
+ ulong *n1, ulong *n2)
+{
+ my_wc_t wc;
+ int res;
+ const uchar *e=s+slen;
+ MY_UNICASE_INFO **uni_plane= cs->caseinfo;
+
+ while (e > s+1 && e[-1] == ' ' && e[-2] == '\0')
+ e-= 2;
+
+ while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e)) >0)
+ {
+ int plane = (wc>>8) & 0xFF;
+ wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc;
+ n1[0]^= (((n1[0] & 63)+n2[0])*(wc & 0xFF))+ (n1[0] << 8);
+ n2[0]+=3;
+ n1[0]^= (((n1[0] & 63)+n2[0])*(wc >> 8))+ (n1[0] << 8);
+ n2[0]+=3;
+ s+=res;
+ }
+}
+
+
+static size_t my_casedn_ucs2(CHARSET_INFO *cs, char *src, size_t srclen,
+ char *dst __attribute__((unused)),
+ size_t dstlen __attribute__((unused)))
+{
+ my_wc_t wc;
+ int res;
+ char *srcend= src + srclen;
+ MY_UNICASE_INFO **uni_plane= cs->caseinfo;
+ DBUG_ASSERT(src == dst && srclen == dstlen);
+
+ while ((src < srcend) &&
+ (res= my_ucs2_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0)
+ {
+ int plane= (wc>>8) & 0xFF;
+ wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].tolower : wc;
+ if (res != my_uni_ucs2(cs, wc, (uchar*) src, (uchar*) srcend))
+ break;
+ src+= res;
+ }
+ return srclen;
+}
+
+
+static int my_strnncoll_ucs2(CHARSET_INFO *cs,
+ const uchar *s, size_t slen,
+ const uchar *t, size_t tlen,
+ my_bool t_is_prefix)
+{
+ int s_res,t_res;
+ my_wc_t UNINIT_VAR(s_wc),t_wc;
+ const uchar *se=s+slen;
+ const uchar *te=t+tlen;
+ MY_UNICASE_INFO **uni_plane= cs->caseinfo;
+
+ while ( s < se && t < te )
+ {
+ int plane;
+ s_res=my_ucs2_uni(cs,&s_wc, s, se);
+ t_res=my_ucs2_uni(cs,&t_wc, t, te);
+
+ if ( s_res <= 0 || t_res <= 0 )
+ {
+ /* Incorrect string, compare by char value */
+ return ((int)s[0]-(int)t[0]);
+ }
+
+ plane=(s_wc>>8) & 0xFF;
+ s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc;
+ plane=(t_wc>>8) & 0xFF;
+ t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc;
+ if ( s_wc != t_wc )
+ {
+ return s_wc > t_wc ? 1 : -1;
+ }
+
+ s+=s_res;
+ t+=t_res;
+ }
+ return (int) (t_is_prefix ? t-te : ((se-s) - (te-t)));
+}
+
+/*
+ Compare strings, discarding end space
+
+ SYNOPSIS
+ my_strnncollsp_ucs2()
+ cs character set handler
+ a First string to compare
+ a_length Length of 'a'
+ b Second string to compare
+ b_length Length of 'b'
+
+ IMPLEMENTATION
+ If one string is shorter as the other, then we space extend the other
+ so that the strings have equal length.
+
+ This will ensure that the following things hold:
+
+ "a" == "a "
+ "a\0" < "a"
+ "a\0" < "a "
+
+ RETURN
+ < 0 a < b
+ = 0 a == b
+ > 0 a > b
+*/
+
+static int my_strnncollsp_ucs2(CHARSET_INFO *cs __attribute__((unused)),
+ const uchar *s, size_t slen,
+ const uchar *t, size_t tlen,
+ my_bool diff_if_only_endspace_difference
+ __attribute__((unused)))
+{
+ const uchar *se, *te;
+ size_t minlen;
+ MY_UNICASE_INFO **uni_plane= cs->caseinfo;
+
+ /* extra safety to make sure the lengths are even numbers */
+ slen&= ~1;
+ tlen&= ~1;
+
+ se= s + slen;
+ te= t + tlen;
+
+ for (minlen= min(slen, tlen); minlen; minlen-= 2)
+ {
+ int s_wc = uni_plane[s[0]] ? (int) uni_plane[s[0]][s[1]].sort :
+ (((int) s[0]) << 8) + (int) s[1];
+
+ int t_wc = uni_plane[t[0]] ? (int) uni_plane[t[0]][t[1]].sort :
+ (((int) t[0]) << 8) + (int) t[1];
+ if ( s_wc != t_wc )
+ return s_wc > t_wc ? 1 : -1;
+
+ s+= 2;
+ t+= 2;
+ }
+
+ if (slen != tlen)
+ {
+ int swap= 1;
+ if (slen < tlen)
+ {
+ s= t;
+ se= te;
+ swap= -1;
+ }
+
+ for ( ; s < se ; s+= 2)
+ {
+ if (s[0] || s[1] != ' ')
+ return (s[0] == 0 && s[1] < ' ') ? -swap : swap;
+ }
+ }
+ return 0;
+}
+
+
+static uint my_ismbchar_ucs2(CHARSET_INFO *cs __attribute__((unused)),
+ const char *b __attribute__((unused)),
+ const char *e __attribute__((unused)))
+{
+ return 2;
+}
+
+
+static uint my_mbcharlen_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
+ uint c __attribute__((unused)))
+{
+ return 2;
+}
+
+
static
size_t my_numchars_ucs2(CHARSET_INFO *cs __attribute__((unused)),
const char *b, const char *e)
@@ -1336,25 +3168,6 @@ size_t my_well_formed_len_ucs2(CHARSET_INFO *cs __attribute__((unused)),
static
-void my_fill_ucs2(CHARSET_INFO *cs __attribute__((unused)),
- char *s, size_t l, int fill)
-{
- for ( ; l >= 2; s[0]= 0, s[1]= fill, s+=2, l-=2);
-}
-
-
-static
-size_t my_lengthsp_ucs2(CHARSET_INFO *cs __attribute__((unused)),
- const char *ptr, size_t length)
-{
- const char *end= ptr+length;
- while (end > ptr+1 && end[-1] == ' ' && end[-2] == '\0')
- end-=2;
- return (size_t) (end-ptr);
-}
-
-
-static
int my_wildcmp_ucs2_ci(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
@@ -1457,29 +3270,6 @@ static int my_strnncollsp_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)),
static
-int my_strcasecmp_ucs2_bin(CHARSET_INFO *cs, const char *s, const char *t)
-{
- size_t s_len= strlen(s);
- size_t t_len= strlen(t);
- size_t len = (s_len > t_len) ? s_len : t_len;
- return my_strncasecmp_ucs2(cs, s, t, len);
-}
-
-
-static
-size_t my_strnxfrm_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)),
- uchar *dst, size_t dstlen,
- const uchar *src, size_t srclen)
-{
- if (dst != src)
- memcpy(dst,src,srclen= min(dstlen,srclen));
- if (dstlen > srclen)
- cs->cset->fill(cs, (char*) dst + srclen, dstlen - srclen, ' ');
- return dstlen;
-}
-
-
-static
void my_hash_sort_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)),
const uchar *key, size_t len,ulong *nr1, ulong *nr2)
{
@@ -1613,38 +3403,16 @@ fill_max_and_min:
-size_t my_scan_ucs2(CHARSET_INFO *cs __attribute__((unused)),
- const char *str, const char *end, int sequence_type)
-{
- const char *str0= str;
- end--; /* for easier loop condition, because of two bytes per character */
-
- switch (sequence_type)
- {
- case MY_SEQ_SPACES:
- for ( ; str < end; str+= 2)
- {
- if (str[0] != '\0' || str[1] != ' ')
- break;
- }
- return (size_t) (str - str0);
- default:
- return 0;
- }
-}
-
-
-
static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler =
{
NULL, /* init */
my_strnncoll_ucs2,
my_strnncollsp_ucs2,
- my_strnxfrm_ucs2,
+ my_strnxfrm_unicode,
my_strnxfrmlen_simple,
my_like_range_ucs2,
my_wildcmp_ucs2_ci,
- my_strcasecmp_ucs2,
+ my_strcasecmp_mb2_or_mb4,
my_instr_mb,
my_hash_sort_ucs2,
my_propagate_simple
@@ -1656,11 +3424,11 @@ static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler =
NULL, /* init */
my_strnncoll_ucs2_bin,
my_strnncollsp_ucs2_bin,
- my_strnxfrm_ucs2_bin,
+ my_strnxfrm_unicode,
my_strnxfrmlen_simple,
my_like_range_ucs2,
my_wildcmp_ucs2_bin,
- my_strcasecmp_ucs2_bin,
+ my_strcasecmp_mb2_or_mb4,
my_instr_mb,
my_hash_sort_ucs2_bin,
my_propagate_simple
@@ -1675,27 +3443,27 @@ MY_CHARSET_HANDLER my_charset_ucs2_handler=
my_numchars_ucs2,
my_charpos_ucs2,
my_well_formed_len_ucs2,
- my_lengthsp_ucs2,
+ my_lengthsp_mb2,
my_numcells_mb,
my_ucs2_uni, /* mb_wc */
my_uni_ucs2, /* wc_mb */
my_mb_ctype_mb,
- my_caseup_str_ucs2,
- my_casedn_str_ucs2,
+ my_caseup_str_mb2_or_mb4,
+ my_casedn_str_mb2_or_mb4,
my_caseup_ucs2,
my_casedn_ucs2,
- my_snprintf_ucs2,
- my_l10tostr_ucs2,
- my_ll10tostr_ucs2,
- my_fill_ucs2,
- my_strntol_ucs2,
- my_strntoul_ucs2,
- my_strntoll_ucs2,
- my_strntoull_ucs2,
- my_strntod_ucs2,
- my_strtoll10_ucs2,
- my_strntoull10rnd_ucs2,
- my_scan_ucs2
+ my_snprintf_mb2,
+ my_l10tostr_mb2_or_mb4,
+ my_ll10tostr_mb2_or_mb4,
+ my_fill_mb2,
+ my_strntol_mb2_or_mb4,
+ my_strntoul_mb2_or_mb4,
+ my_strntoll_mb2_or_mb4,
+ my_strntoull_mb2_or_mb4,
+ my_strntod_mb2_or_mb4,
+ my_strtoll10_mb2,
+ my_strntoull10rnd_mb2_or_mb4,
+ my_scan_mb2
};
@@ -1764,4 +3532,4 @@ CHARSET_INFO my_charset_ucs2_bin=
};
-#endif
+#endif /* HAVE_CHARSET_ucs2 */
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index 91f633e45ce..7de5cdd00ee 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -27,6 +27,16 @@
#define EILSEQ ENOENT
#endif
+
+#define MY_UTF8MB3_GENERAL_CI MY_UTF8MB3 "_general_ci"
+#define MY_UTF8MB3_GENERAL_CS MY_UTF8MB3 "_general_cs"
+#define MY_UTF8MB3_BIN MY_UTF8MB3 "_bin"
+#define MY_UTF8MB4_GENERAL_CI MY_UTF8MB4 "_general_ci"
+#define MY_UTF8MB4_GENERAL_CS MY_UTF8MB4 "_general_cs"
+#define MY_UTF8MB4_BIN MY_UTF8MB4 "_bin"
+
+
+
#ifndef HAVE_CHARSET_utf8
#define HAVE_CHARSET_utf8
#endif
@@ -39,6 +49,14 @@
#define HAVE_UNIDATA
#endif
+#ifdef HAVE_CHARSET_utf16
+#define HAVE_UNIDATA
+#endif
+
+#ifdef HAVE_CHARSET_utf32
+#define HAVE_UNIDATA
+#endif
+
#ifdef HAVE_UNIDATA
#include "my_uctype.h"
@@ -1702,6 +1720,24 @@ MY_UNICASE_INFO *my_unicase_turkish[256]=
};
+#define REPLACEMENT_CHAR 0xFFFD;
+
+
+static inline void
+my_tosort_unicode(MY_UNICASE_INFO **uni_plane, my_wc_t *wc)
+{
+ int page= *wc >> 8;
+ if (page < 256)
+ {
+ if (uni_plane[page])
+ *wc= uni_plane[page][*wc & 0xFF].sort;
+ }
+ else
+ {
+ *wc= REPLACEMENT_CHAR;
+ }
+}
+
/*
** Compare string against string with wildcard
@@ -1712,13 +1748,14 @@ MY_UNICASE_INFO *my_unicase_turkish[256]=
** 1 if matched with wildcard
*/
-int my_wildcmp_unicode(CHARSET_INFO *cs,
- const char *str,const char *str_end,
- const char *wildstr,const char *wildend,
- int escape, int w_one, int w_many,
- MY_UNICASE_INFO **weights)
+int
+my_wildcmp_unicode(CHARSET_INFO *cs,
+ const char *str,const char *str_end,
+ const char *wildstr,const char *wildend,
+ int escape, int w_one, int w_many,
+ MY_UNICASE_INFO **weights)
{
- int result= -1; /* Not found, using wildcards */
+ int result= -1; /* Not found, using wildcards */
my_wc_t s_wc, w_wc;
int scan, plane;
int (*mb_wc)(struct charset_info_st *, my_wc_t *,
@@ -1734,14 +1771,14 @@ int my_wildcmp_unicode(CHARSET_INFO *cs,
(const uchar*)wildend)) <= 0)
return 1;
- if (w_wc == (my_wc_t)w_many)
+ if (w_wc == (my_wc_t) w_many)
{
- result= 1; /* Found an anchor char */
+ result= 1; /* Found an anchor char */
break;
}
wildstr+= scan;
- if (w_wc == (my_wc_t)escape && wildstr < wildend)
+ if (w_wc == (my_wc_t) escape && wildstr < wildend)
{
if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
(const uchar*)wildend)) <= 0)
@@ -1755,29 +1792,27 @@ int my_wildcmp_unicode(CHARSET_INFO *cs,
return 1;
str+= scan;
- if (!escaped && w_wc == (my_wc_t)w_one)
+ if (!escaped && w_wc == (my_wc_t) w_one)
{
- result= 1; /* Found an anchor char */
+ result= 1; /* Found an anchor char */
}
else
{
if (weights)
{
- plane=(s_wc>>8) & 0xFF;
- s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc;
- plane=(w_wc>>8) & 0xFF;
- w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc;
+ my_tosort_unicode(weights, &s_wc);
+ my_tosort_unicode(weights, &w_wc);
}
if (s_wc != w_wc)
- return 1; /* No match */
+ return 1; /* No match */
}
if (wildstr == wildend)
- return (str != str_end); /* Match if both are at end */
+ return (str != str_end); /* Match if both are at end */
}
- if (w_wc == (my_wc_t)w_many)
- { /* Found w_many */
+ if (w_wc == (my_wc_t) w_many)
+ { /* Found w_many */
/* Remove any '%' and '_' from the wild search string */
for ( ; wildstr != wildend ; )
@@ -1786,29 +1821,29 @@ int my_wildcmp_unicode(CHARSET_INFO *cs,
(const uchar*)wildend)) <= 0)
return 1;
- if (w_wc == (my_wc_t)w_many)
- {
- wildstr+= scan;
- continue;
- }
-
- if (w_wc == (my_wc_t)w_one)
- {
- wildstr+= scan;
+ if (w_wc == (my_wc_t)w_many)
+ {
+ wildstr+= scan;
+ continue;
+ }
+
+ if (w_wc == (my_wc_t)w_one)
+ {
+ wildstr+= scan;
if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
(const uchar*)str_end)) <=0)
return 1;
str+= scan;
- continue;
- }
- break; /* Not a wild character */
+ continue;
+ }
+ break; /* Not a wild character */
}
if (wildstr == wildend)
- return 0; /* Ok if w_many is last */
+ return 0; /* Ok if w_many is last */
if (str == str_end)
- return -1;
+ return -1;
if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
(const uchar*)wildend)) <=0)
@@ -1836,10 +1871,8 @@ int my_wildcmp_unicode(CHARSET_INFO *cs,
return 1;
if (weights)
{
- plane=(s_wc>>8) & 0xFF;
- s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc;
- plane=(w_wc>>8) & 0xFF;
- w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc;
+ my_tosort_unicode(weights, &s_wc);
+ my_tosort_unicode(weights, &w_wc);
}
if (s_wc == w_wc)
@@ -1861,8 +1894,53 @@ int my_wildcmp_unicode(CHARSET_INFO *cs,
return (str != str_end ? 1 : 0);
}
-#endif
+/*
+ This function is shared between utf8mb3/utf8mb4/ucs2/utf16/utf32
+*/
+size_t
+my_strnxfrm_unicode(CHARSET_INFO *cs,
+ uchar *dst, size_t dstlen,
+ const uchar *src, size_t srclen)
+{
+ my_wc_t wc;
+ int res;
+ uchar *de= dst + dstlen;
+ uchar *de_beg= de - 1;
+ const uchar *se = src + srclen;
+ MY_UNICASE_INFO **uni_plane= (cs->state & MY_CS_BINSORT) ?
+ NULL : cs->caseinfo;
+ LINT_INIT(wc);
+ DBUG_ASSERT(src);
+
+ while (dst < de_beg)
+ {
+ if ((res= cs->cset->mb_wc(cs,&wc, src, se)) <= 0)
+ break;
+ src+=res;
+
+ if (uni_plane)
+ my_tosort_unicode(uni_plane, &wc);
+
+ *dst++= (uchar) (wc >> 8);
+ if (dst < de)
+ *dst++= (uchar) (wc & 0xFF);
+ }
+
+ while (dst < de_beg) /* Fill the tail with keys for space character */
+ {
+ *dst++= 0x00;
+ *dst++= 0x20;
+ }
+
+ if (dst < de) /* Clear the last byte, if "dstlen" was an odd number */
+ *dst= 0x00;
+
+ return dstlen;
+}
+
+
+#endif /* HAVE_UNIDATA */
#ifdef HAVE_CHARSET_utf8
@@ -2569,44 +2647,6 @@ size_t my_strnxfrmlen_utf8(CHARSET_INFO *cs __attribute__((unused)),
}
-static size_t my_strnxfrm_utf8(CHARSET_INFO *cs,
- uchar *dst, size_t dstlen,
- const uchar *src, size_t srclen)
-{
- my_wc_t wc;
- int res;
- int plane;
- uchar *de= dst + dstlen;
- uchar *de_beg= de - 1;
- const uchar *se = src + srclen;
- MY_UNICASE_INFO **uni_plane= cs->caseinfo;
-
- while (dst < de_beg)
- {
- if ((res=my_utf8_uni(cs,&wc, src, se)) <= 0)
- break;
- src+=res;
-
- plane=(wc>>8) & 0xFF;
- wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc;
-
- *dst++= (uchar)(wc >> 8);
- *dst++= (uchar)(wc & 0xFF);
-
- }
-
- while (dst < de_beg) /* Fill the tail with keys for space character */
- {
- *dst++= 0x00;
- *dst++= 0x20;
- }
-
- if (dst < de) /* Clear the last byte, if "dstlen" was an odd number */
- *dst= 0x00;
-
- return dstlen;
-}
-
static uint my_ismbchar_utf8(CHARSET_INFO *cs,const char *b, const char *e)
{
my_wc_t wc;
@@ -2642,7 +2682,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
NULL, /* init */
my_strnncoll_utf8,
my_strnncollsp_utf8,
- my_strnxfrm_utf8,
+ my_strnxfrm_unicode,
my_strnxfrmlen_utf8,
my_like_range_mb,
my_wildcmp_utf8,
@@ -2891,7 +2931,7 @@ static MY_COLLATION_HANDLER my_collation_cs_handler =
NULL, /* init */
my_strnncoll_utf8_cs,
my_strnncollsp_utf8_cs,
- my_strnxfrm_utf8,
+ my_strnxfrm_unicode,
my_strnxfrmlen_utf8,
my_like_range_simple,
my_wildcmp_mb,
@@ -4154,7 +4194,7 @@ static MY_COLLATION_HANDLER my_collation_filename_handler =
NULL, /* init */
my_strnncoll_utf8,
my_strnncollsp_utf8,
- my_strnxfrm_utf8,
+ my_strnxfrm_unicode,
my_strnxfrmlen_utf8,
my_like_range_mb,
my_wildcmp_utf8,
@@ -4284,3 +4324,859 @@ int main()
+#ifdef HAVE_CHARSET_utf8mb4
+
+/*
+ We consider bytes with code more than 127 as a letter.
+ This garantees that word boundaries work fine with regular
+ expressions. Note, there is no need to mark byte 255 as a
+ letter, it is illegal byte in UTF8.
+*/
+static uchar ctype_utf8mb4[]=
+{
+ 0,
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
+ 16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16,
+ 16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0
+};
+
+
+static uchar to_lower_utf8mb4[]=
+{
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+ 64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
+ 112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95,
+ 96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
+ 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
+ 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
+ 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
+ 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
+ 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
+ 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
+ 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
+ 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
+ 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
+};
+
+
+static uchar to_upper_utf8mb4[]=
+{
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+ 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+ 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
+ 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+ 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
+ 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
+ 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
+ 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
+ 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
+ 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
+ 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
+ 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
+ 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
+};
+
+
+static inline int
+bincmp_utf8mb4(const uchar *s, const uchar *se,
+ const uchar *t, const uchar *te)
+{
+ int slen= (int) (se - s), tlen= (int) (te - t);
+ int len= min(slen, tlen);
+ int cmp= memcmp(s, t, len);
+ return cmp ? cmp : slen - tlen;
+}
+
+
+static int
+my_mb_wc_utf8mb4(CHARSET_INFO *cs __attribute__((unused)),
+ my_wc_t * pwc, const uchar *s, const uchar *e)
+{
+ uchar c;
+
+ if (s >= e)
+ return MY_CS_TOOSMALL;
+
+ c= s[0];
+ if (c < 0x80)
+ {
+ *pwc= c;
+ return 1;
+ }
+ else if (c < 0xc2)
+ return MY_CS_ILSEQ;
+ else if (c < 0xe0)
+ {
+ if (s + 2 > e) /* We need 2 characters */
+ return MY_CS_TOOSMALL2;
+
+ if (!((s[1] ^ 0x80) < 0x40))
+ return MY_CS_ILSEQ;
+
+ *pwc= ((my_wc_t) (c & 0x1f) << 6) | (my_wc_t) (s[1] ^ 0x80);
+ return 2;
+ }
+ else if (c < 0xf0)
+ {
+ if (s + 3 > e) /* We need 3 characters */
+ return MY_CS_TOOSMALL3;
+
+ if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 &&
+ (c >= 0xe1 || s[1] >= 0xa0)))
+ return MY_CS_ILSEQ;
+
+ *pwc= ((my_wc_t) (c & 0x0f) << 12) |
+ ((my_wc_t) (s[1] ^ 0x80) << 6) |
+ (my_wc_t) (s[2] ^ 0x80);
+ return 3;
+ }
+ else if (c < 0xf5)
+ {
+ if (s + 4 > e) /* We need 4 characters */
+ return MY_CS_TOOSMALL4;
+
+ /*
+ UTF-8 quick four-byte mask:
+ 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ Encoding allows to encode U+00010000..U+001FFFFF
+
+ The maximum character defined in the Unicode standard is U+0010FFFF.
+ Higher characters U+00110000..U+001FFFFF are not used.
+
+ 11110000.10010000.10xxxxxx.10xxxxxx == F0.90.80.80 == U+00010000 (min)
+ 11110100.10001111.10111111.10111111 == F4.8F.BF.BF == U+0010FFFF (max)
+
+ Valid codes:
+ [F0][90..BF][80..BF][80..BF]
+ [F1][80..BF][80..BF][80..BF]
+ [F2][80..BF][80..BF][80..BF]
+ [F3][80..BF][80..BF][80..BF]
+ [F4][80..8F][80..BF][80..BF]
+ */
+
+ if (!((s[1] ^ 0x80) < 0x40 &&
+ (s[2] ^ 0x80) < 0x40 &&
+ (s[3] ^ 0x80) < 0x40 &&
+ (c >= 0xf1 || s[1] >= 0x90) &&
+ (c <= 0xf3 || s[1] <= 0x8F)))
+ return MY_CS_ILSEQ;
+ *pwc = ((my_wc_t) (c & 0x07) << 18) |
+ ((my_wc_t) (s[1] ^ 0x80) << 12) |
+ ((my_wc_t) (s[2] ^ 0x80) << 6) |
+ (my_wc_t) (s[3] ^ 0x80);
+ return 4;
+ }
+ return MY_CS_ILSEQ;
+}
+
+
+/*
+ The same as above, but without range check
+ for example, for a null-terminated string
+*/
+static int
+my_mb_wc_utf8mb4_no_range(CHARSET_INFO *cs __attribute__((unused)),
+ my_wc_t *pwc, const uchar *s)
+{
+ uchar c;
+
+ c= s[0];
+ if (c < 0x80)
+ {
+ *pwc = c;
+ return 1;
+ }
+
+ if (c < 0xc2)
+ return MY_CS_ILSEQ;
+
+ if (c < 0xe0)
+ {
+ if (!((s[1] ^ 0x80) < 0x40))
+ return MY_CS_ILSEQ;
+
+ *pwc = ((my_wc_t) (c & 0x1f) << 6) | (my_wc_t) (s[1] ^ 0x80);
+ return 2;
+ }
+
+ if (c < 0xf0)
+ {
+ if (!((s[1] ^ 0x80) < 0x40 &&
+ (s[2] ^ 0x80) < 0x40 &&
+ (c >= 0xe1 || s[1] >= 0xa0)))
+ return MY_CS_ILSEQ;
+ *pwc= ((my_wc_t) (c & 0x0f) << 12) |
+ ((my_wc_t) (s[1] ^ 0x80) << 6) |
+ (my_wc_t) (s[2] ^ 0x80);
+
+ return 3;
+ }
+ else if (c < 0xf5)
+ {
+ if (!((s[1] ^ 0x80) < 0x40 &&
+ (s[2] ^ 0x80) < 0x40 &&
+ (s[3] ^ 0x80) < 0x40 &&
+ (c >= 0xf1 || s[1] >= 0x90) &&
+ (c <= 0xf3 || s[1] <= 0x8F)))
+ return MY_CS_ILSEQ;
+ *pwc = ((my_wc_t) (c & 0x07) << 18) |
+ ((my_wc_t) (s[1] ^ 0x80) << 12) |
+ ((my_wc_t) (s[2] ^ 0x80) << 6) |
+ (my_wc_t) (s[3] ^ 0x80);
+ return 4;
+ }
+ return MY_CS_ILSEQ;
+}
+
+
+static int
+my_wc_mb_utf8mb4(CHARSET_INFO *cs __attribute__((unused)),
+ my_wc_t wc, uchar *r, uchar *e)
+{
+ int count;
+
+ if (r >= e)
+ return MY_CS_TOOSMALL;
+
+ if (wc < 0x80)
+ count= 1;
+ else if (wc < 0x800)
+ count= 2;
+ else if (wc < 0x10000)
+ count= 3;
+ else if (wc < 0x200000)
+ count= 4;
+ else return MY_CS_ILUNI;
+
+ if (r + count > e)
+ return MY_CS_TOOSMALLN(count);
+
+ switch (count) {
+ /* Fall through all cases!!! */
+ case 4: r[3] = (uchar) (0x80 | (wc & 0x3f)); wc = wc >> 6; wc |= 0x10000;
+ case 3: r[2] = (uchar) (0x80 | (wc & 0x3f)); wc = wc >> 6; wc |= 0x800;
+ case 2: r[1] = (uchar) (0x80 | (wc & 0x3f)); wc = wc >> 6; wc |= 0xc0;
+ case 1: r[0] = (uchar) wc;
+ }
+ return count;
+}
+
+
+/*
+ The same as above, but without range check.
+*/
+static int
+my_wc_mb_utf8mb4_no_range(CHARSET_INFO *cs __attribute__((unused)),
+ my_wc_t wc, uchar *r)
+{
+ int count;
+
+ if (wc < 0x80)
+ count= 1;
+ else if (wc < 0x800)
+ count= 2;
+ else if (wc < 0x10000)
+ count= 3;
+ else if (wc < 0x200000)
+ count= 4;
+ else
+ return MY_CS_ILUNI;
+
+ switch (count)
+ {
+ /* Fall through all cases!!! */
+ case 4: r[3]= (uchar) (0x80 | (wc & 0x3f)); wc= wc >> 6; wc |= 0x10000;
+ case 3: r[2]= (uchar) (0x80 | (wc & 0x3f)); wc= wc >> 6; wc |= 0x800;
+ case 2: r[1]= (uchar) (0x80 | (wc & 0x3f)); wc= wc >> 6; wc |= 0xc0;
+ case 1: r[0]= (uchar) wc;
+ }
+ return count;
+}
+
+
+static inline void
+my_tolower_utf8mb4(MY_UNICASE_INFO **uni_plane, my_wc_t *wc)
+{
+ int page= *wc >> 8;
+ if (page < 256 && uni_plane[page])
+ *wc= uni_plane[page][*wc & 0xFF].tolower;
+}
+
+
+static inline void
+my_toupper_utf8mb4(MY_UNICASE_INFO **uni_plane, my_wc_t *wc)
+{
+ int page= *wc >> 8;
+ if (page < 256 && uni_plane[page])
+ *wc= uni_plane[page][*wc & 0xFF].toupper;
+}
+
+
+static size_t
+my_caseup_utf8mb4(CHARSET_INFO *cs, char *src, size_t srclen,
+ char *dst, size_t dstlen)
+{
+ my_wc_t wc;
+ int srcres, dstres;
+ char *srcend= src + srclen, *dstend= dst + dstlen, *dst0= dst;
+ MY_UNICASE_INFO **uni_plane= cs->caseinfo;
+ DBUG_ASSERT(src != dst || cs->caseup_multiply == 1);
+
+ while ((src < srcend) &&
+ (srcres= my_mb_wc_utf8mb4(cs, &wc,
+ (uchar *) src, (uchar*) srcend)) > 0)
+ {
+ my_toupper_utf8mb4(uni_plane, &wc);
+ if ((dstres= my_wc_mb_utf8mb4(cs, wc, (uchar*) dst, (uchar*) dstend)) <= 0)
+ break;
+ src+= srcres;
+ dst+= dstres;
+ }
+ return (size_t) (dst - dst0);
+}
+
+
+static inline void
+my_hash_add(ulong *n1, ulong *n2, uint ch)
+{
+ n1[0]^= (((n1[0] & 63) + n2[0]) * (ch)) + (n1[0] << 8);
+ n2[0]+= 3;
+}
+
+
+static void
+my_hash_sort_utf8mb4(CHARSET_INFO *cs, const uchar *s, size_t slen,
+ ulong *n1, ulong *n2)
+{
+ my_wc_t wc;
+ int res;
+ const uchar *e= s + slen;
+ MY_UNICASE_INFO **uni_plane= cs->caseinfo;
+
+ /*
+ Remove end space. We do this to be able to compare
+ 'A ' and 'A' as identical
+ */
+ while (e > s && e[-1] == ' ')
+ e--;
+
+ while ((res= my_mb_wc_utf8mb4(cs, &wc, (uchar*) s, (uchar*) e)) > 0)
+ {
+ my_tosort_unicode(uni_plane, &wc);
+ my_hash_add(n1, n2, (uint) (wc & 0xFF));
+ my_hash_add(n1, n2, (uint) (wc >> 8) & 0xFF);
+ if (wc > 0xFFFF)
+ {
+ /*
+ Put the highest byte only if it is non-zero,
+ to make hash functions for utf8mb3 and utf8mb4
+ compatible for BMP characters.
+ This is useful to keep order of records in
+ test results, e.g. for "SHOW GRANTS".
+ */
+ my_hash_add(n1, n2, (uint) (wc >> 16) & 0xFF);
+ }
+ s+= res;
+ }
+}
+
+
+static size_t
+my_caseup_str_utf8mb4(CHARSET_INFO *cs, char *src)
+{
+ my_wc_t wc;
+ int srcres, dstres;
+ char *dst= src, *dst0= src;
+ MY_UNICASE_INFO **uni_plane= cs->caseinfo;
+ DBUG_ASSERT(cs->caseup_multiply == 1);
+
+ while (*src &&
+ (srcres= my_mb_wc_utf8mb4_no_range(cs, &wc, (uchar *) src)) > 0)
+ {
+ my_toupper_utf8mb4(uni_plane, &wc);
+ if ((dstres= my_wc_mb_utf8mb4_no_range(cs, wc, (uchar*) dst)) <= 0)
+ break;
+ src+= srcres;
+ dst+= dstres;
+ }
+ *dst= '\0';
+ return (size_t) (dst - dst0);
+}
+
+
+static size_t
+my_casedn_utf8mb4(CHARSET_INFO *cs,
+ char *src, size_t srclen,
+ char *dst, size_t dstlen)
+{
+ my_wc_t wc;
+ int srcres, dstres;
+ char *srcend= src + srclen, *dstend= dst + dstlen, *dst0= dst;
+ MY_UNICASE_INFO **uni_plane= cs->caseinfo;
+ DBUG_ASSERT(src != dst || cs->casedn_multiply == 1);
+
+ while ((src < srcend) &&
+ (srcres= my_mb_wc_utf8mb4(cs, &wc,
+ (uchar*) src, (uchar*) srcend)) > 0)
+ {
+ my_tolower_utf8mb4(uni_plane, &wc);
+ if ((dstres= my_wc_mb_utf8mb4(cs, wc, (uchar*) dst, (uchar*) dstend)) <= 0)
+ break;
+ src+= srcres;
+ dst+= dstres;
+ }
+ return (size_t) (dst - dst0);
+}
+
+
+static size_t
+my_casedn_str_utf8mb4(CHARSET_INFO *cs, char *src)
+{
+ my_wc_t wc;
+ int srcres, dstres;
+ char *dst= src, *dst0= src;
+ MY_UNICASE_INFO **uni_plane= cs->caseinfo;
+ DBUG_ASSERT(cs->casedn_multiply == 1);
+
+ while (*src &&
+ (srcres= my_mb_wc_utf8mb4_no_range(cs, &wc, (uchar *) src)) > 0)
+ {
+ my_tolower_utf8mb4(uni_plane, &wc);
+ if ((dstres= my_wc_mb_utf8mb4_no_range(cs, wc, (uchar*) dst)) <= 0)
+ break;
+ src+= srcres;
+ dst+= dstres;
+ }
+
+ /*
+ In rare cases lower string can be shorter than
+ the original string, for example:
+
+ "U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE"
+ (which is 0xC4B0 in utf8, i.e. two bytes)
+
+ is converted into
+
+ "U+0069 LATIN SMALL LETTER I"
+ (which is 0x69 in utf8, i.e. one byte)
+
+ So, we need to put '\0' terminator after converting.
+ */
+
+ *dst= '\0';
+ return (size_t) (dst - dst0);
+}
+
+
+static int
+my_strnncoll_utf8mb4(CHARSET_INFO *cs,
+ const uchar *s, size_t slen,
+ const uchar *t, size_t tlen,
+ my_bool t_is_prefix)
+{
+ my_wc_t s_wc,t_wc;
+ const uchar *se= s + slen;
+ const uchar *te= t + tlen;
+ MY_UNICASE_INFO **uni_plane= cs->caseinfo;
+ LINT_INIT(s_wc);
+ LINT_INIT(t_wc);
+
+ while ( s < se && t < te )
+ {
+ int s_res= my_mb_wc_utf8mb4(cs, &s_wc, s, se);
+ int t_res= my_mb_wc_utf8mb4(cs, &t_wc, t, te);
+
+ if ( s_res <= 0 || t_res <= 0 )
+ {
+ /* Incorrect string, compare bytewise */
+ return bincmp_utf8mb4(s, se, t, te);
+ }
+
+ my_tosort_unicode(uni_plane, &s_wc);
+ my_tosort_unicode(uni_plane, &t_wc);
+
+ if ( s_wc != t_wc )
+ {
+ return s_wc > t_wc ? 1 : -1;
+ }
+
+ s+= s_res;
+ t+= t_res;
+ }
+ return (int) (t_is_prefix ? (t - te) : ((se - s) - (te - t)));
+}
+
+
+/**
+
+ Compare strings, discarding end space
+
+ If one string is shorter as the other, then we space extend the other
+ so that the strings have equal length.
+
+ This will ensure that the following things hold:
+
+ "a" == "a "
+ "a\0" < "a"
+ "a\0" < "a "
+
+ @param cs Character set pinter.
+ @param a First string to compare.
+ @param a_length Length of 'a'.
+ @param b Second string to compare.
+ @param b_length Length of 'b'.
+ @param diff_if_only_endspace_difference
+ Set to 1 if the strings should be regarded as different
+ if they only difference in end space
+
+ @return Comparison result.
+ @retval Negative number, if a less than b.
+ @retval 0, if a is equal to b
+ @retval Positive number, if a > b
+*/
+
+static int
+my_strnncollsp_utf8mb4(CHARSET_INFO *cs,
+ const uchar *s, size_t slen,
+ const uchar *t, size_t tlen,
+ my_bool diff_if_only_endspace_difference)
+{
+ int res;
+ my_wc_t s_wc, t_wc;
+ const uchar *se= s + slen, *te= t + tlen;
+ MY_UNICASE_INFO **uni_plane= cs->caseinfo;
+ LINT_INIT(s_wc);
+ LINT_INIT(t_wc);
+
+#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
+ diff_if_only_endspace_difference= FALSE;
+#endif
+
+ while ( s < se && t < te )
+ {
+ int s_res= my_mb_wc_utf8mb4(cs, &s_wc, s, se);
+ int t_res= my_mb_wc_utf8mb4(cs, &t_wc, t, te);
+
+ if ( s_res <= 0 || t_res <= 0 )
+ {
+ /* Incorrect string, compare bytewise */
+ return bincmp_utf8mb4(s, se, t, te);
+ }
+
+ my_tosort_unicode(uni_plane, &s_wc);
+ my_tosort_unicode(uni_plane, &t_wc);
+
+ if ( s_wc != t_wc )
+ {
+ return s_wc > t_wc ? 1 : -1;
+ }
+
+ s+=s_res;
+ t+=t_res;
+ }
+
+ slen= (size_t) (se-s);
+ tlen= (size_t) (te-t);
+ res= 0;
+
+ if (slen != tlen)
+ {
+ int swap= 1;
+ if (diff_if_only_endspace_difference)
+ res= 1; /* Assume 'a' is bigger */
+ if (slen < tlen)
+ {
+ slen= tlen;
+ s= t;
+ se= te;
+ swap= -1;
+ res= -res;
+ }
+ /*
+ This following loop uses the fact that in UTF-8
+ all multibyte characters are greater than space,
+ and all multibyte head characters are greater than
+ space. It means if we meet a character greater
+ than space, it always means that the longer string
+ is greater. So we can reuse the same loop from the
+ 8bit version, without having to process full multibute
+ sequences.
+ */
+ for ( ; s < se; s++)
+ {
+ if (*s != ' ')
+ return (*s < ' ') ? -swap : swap;
+ }
+ }
+ return res;
+}
+
+
+/**
+ Compare 0-terminated UTF8 strings.
+
+ @param cs character set handler
+ @param s First 0-terminated string to compare
+ @param t Second 0-terminated string to compare
+
+ @return Comparison result.
+ @retval negative number if s < t
+ @retval positive number if s > t
+ @retval 0 is the strings are equal
+*/
+
+static int
+my_strcasecmp_utf8mb4(CHARSET_INFO *cs, const char *s, const char *t)
+{
+ MY_UNICASE_INFO **uni_plane= cs->caseinfo;
+ while (s[0] && t[0])
+ {
+ my_wc_t s_wc,t_wc;
+
+ if ((uchar) s[0] < 128)
+ {
+ /*
+ s[0] is between 0 and 127.
+ It represents a single byte character.
+ Convert it into weight according to collation.
+ */
+ s_wc= plane00[(uchar) s[0]].tolower;
+ s++;
+ }
+ else
+ {
+ int res= my_mb_wc_utf8mb4_no_range(cs, &s_wc, (const uchar*) s);
+
+ /*
+ In the case of wrong multibyte sequence we will
+ call strcmp() for byte-to-byte comparison.
+ */
+ if (res <= 0)
+ return strcmp(s, t);
+ s+= res;
+
+ my_tolower_utf8mb4(uni_plane, &s_wc);
+ }
+
+
+ /* Do the same for the second string */
+
+ if ((uchar) t[0] < 128)
+ {
+ /* Convert single byte character into weight */
+ t_wc= plane00[(uchar) t[0]].tolower;
+ t++;
+ }
+ else
+ {
+ int res= my_mb_wc_utf8mb4_no_range(cs, &t_wc, (const uchar*) t);
+ if (res <= 0)
+ return strcmp(s, t);
+ t+= res;
+
+ my_tolower_utf8mb4(uni_plane, &t_wc);
+ }
+
+ /* Now we have two weights, let's compare them */
+ if ( s_wc != t_wc )
+ return ((int) s_wc) - ((int) t_wc);
+ }
+ return ((int) (uchar) s[0]) - ((int) (uchar) t[0]);
+}
+
+
+static int
+my_wildcmp_utf8mb4(CHARSET_INFO *cs,
+ const char *str, const char *strend,
+ const char *wildstr, const char *wildend,
+ int escape, int w_one, int w_many)
+{
+ return my_wildcmp_unicode(cs, str, strend, wildstr, wildend,
+ escape, w_one, w_many, cs->caseinfo);
+}
+
+
+static size_t
+my_strnxfrmlen_utf8mb4(CHARSET_INFO *cs __attribute__((unused)), size_t len)
+{
+ /* TODO: fix when working on WL "Unicode new version" */
+ return (len * 2 + 2) / 4;
+}
+
+
+static uint
+my_ismbchar_utf8mb4(CHARSET_INFO *cs, const char *b, const char *e)
+{
+ my_wc_t wc;
+ int res= my_mb_wc_utf8mb4(cs,&wc, (const uchar*)b, (const uchar*)e);
+ return (res > 1) ? res : 0;
+}
+
+
+static uint
+my_mbcharlen_utf8mb4(CHARSET_INFO *cs __attribute__((unused)), uint c)
+{
+ if (c < 0x80)
+ return 1;
+ if (c < 0xc2)
+ return 0; /* Illegal mb head */
+ if (c < 0xe0)
+ return 2;
+ if (c < 0xf0)
+ return 3;
+ if (c < 0xf8)
+ return 4;
+ return 0; /* Illegal mb head */;
+}
+
+
+static MY_COLLATION_HANDLER my_collation_utf8mb4_general_ci_handler=
+{
+ NULL, /* init */
+ my_strnncoll_utf8mb4,
+ my_strnncollsp_utf8mb4,
+ my_strnxfrm_unicode,
+ my_strnxfrmlen_utf8mb4,
+ my_like_range_mb,
+ my_wildcmp_utf8mb4,
+ my_strcasecmp_utf8mb4,
+ my_instr_mb,
+ my_hash_sort_utf8mb4,
+ my_propagate_complex
+};
+
+
+static MY_COLLATION_HANDLER my_collation_utf8mb4_bin_handler =
+{
+ NULL, /* init */
+ my_strnncoll_mb_bin,
+ my_strnncollsp_mb_bin,
+ my_strnxfrm_unicode,
+ my_strnxfrmlen_utf8mb4,
+ my_like_range_mb,
+ my_wildcmp_mb_bin,
+ my_strcasecmp_mb_bin,
+ my_instr_mb,
+ my_hash_sort_mb_bin,
+ my_propagate_simple
+};
+
+
+MY_CHARSET_HANDLER my_charset_utf8mb4_handler=
+{
+ NULL, /* init */
+ my_ismbchar_utf8mb4,
+ my_mbcharlen_utf8mb4,
+ my_numchars_mb,
+ my_charpos_mb,
+ my_well_formed_len_mb,
+ my_lengthsp_8bit,
+ my_numcells_mb,
+ my_mb_wc_utf8mb4,
+ my_wc_mb_utf8mb4,
+ my_mb_ctype_mb,
+ my_caseup_str_utf8mb4,
+ my_casedn_str_utf8mb4,
+ my_caseup_utf8mb4,
+ my_casedn_utf8mb4,
+ my_snprintf_8bit,
+ my_long10_to_str_8bit,
+ my_longlong10_to_str_8bit,
+ my_fill_8bit,
+ my_strntol_8bit,
+ my_strntoul_8bit,
+ my_strntoll_8bit,
+ my_strntoull_8bit,
+ my_strntod_8bit,
+ my_strtoll10_8bit,
+ my_strntoull10rnd_8bit,
+ my_scan_8bit
+};
+
+
+
+CHARSET_INFO my_charset_utf8mb4_general_ci=
+{
+ 45,0,0, /* number */
+ MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_UNICODE_SUPPLEMENT, /* state */
+ MY_UTF8MB4, /* cs name */
+ MY_UTF8MB4_GENERAL_CI,/* name */
+ "UTF-8 Unicode", /* comment */
+ NULL, /* tailoring */
+ ctype_utf8mb4, /* ctype */
+ to_lower_utf8mb4, /* to_lower */
+ to_upper_utf8mb4, /* to_upper */
+ to_upper_utf8mb4, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 1, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 1, /* mbminlen */
+ 4, /* mbmaxlen */
+ 0, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf8mb4_handler,
+ &my_collation_utf8mb4_general_ci_handler
+};
+
+
+CHARSET_INFO my_charset_utf8mb4_bin=
+{
+ 46,0,0, /* number */
+ MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_UNICODE_SUPPLEMENT, /* state */
+ MY_UTF8MB4, /* cs name */
+ MY_UTF8MB4_BIN, /* name */
+ "UTF-8 Unicode", /* comment */
+ NULL, /* tailoring */
+ ctype_utf8mb4, /* ctype */
+ to_lower_utf8mb4, /* to_lower */
+ to_upper_utf8mb4, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 1, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 1, /* mbminlen */
+ 4, /* mbmaxlen */
+ 0, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf8mb4_handler,
+ &my_collation_utf8mb4_bin_handler
+};
+
+#endif /* HAVE_CHARSET_utf8mb4 */