diff options
author | Alexander Barkov <bar@mysql.com> | 2010-02-24 13:52:59 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mysql.com> | 2010-02-24 13:52:59 +0400 |
commit | c803a7c460e7317b421b9ca15e5caff350245773 (patch) | |
tree | 3415439db845a6474471e4a6b8b460e3256965e7 /strings | |
parent | 4836199d701af05a9c89563b642c51ee5640e804 (diff) | |
parent | 8994fad85db18b4ab31fc67e2f8e15f1203d0b1a (diff) | |
download | mariadb-git-c803a7c460e7317b421b9ca15e5caff350245773.tar.gz |
Merging WL#1213 into mysql-next-mr-bar2
Diffstat (limited to 'strings')
-rw-r--r-- | strings/ctype-mb.c | 38 | ||||
-rw-r--r-- | strings/ctype-uca.c | 1986 | ||||
-rw-r--r-- | strings/ctype-ucs2.c | 3100 | ||||
-rw-r--r-- | strings/ctype-utf8.c | 1054 |
4 files changed, 5417 insertions, 761 deletions
diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c index 252c5a08b8c..98b598c3c2c 100644 --- a/strings/ctype-mb.c +++ b/strings/ctype-mb.c @@ -467,10 +467,11 @@ uint my_instr_mb(CHARSET_INFO *cs, /* BINARY collations handlers for MB charsets */ -static int my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)), - const uchar *s, size_t slen, - const uchar *t, size_t tlen, - my_bool t_is_prefix) +int +my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)), + const uchar *s, size_t slen, + const uchar *t, size_t tlen, + my_bool t_is_prefix) { size_t len=min(slen,tlen); int cmp= memcmp(s,t,len); @@ -503,10 +504,11 @@ static int my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)), 0 if strings are equal */ -static int my_strnncollsp_mb_bin(CHARSET_INFO * cs __attribute__((unused)), - const uchar *a, size_t a_length, - const uchar *b, size_t b_length, - my_bool diff_if_only_endspace_difference) +int +my_strnncollsp_mb_bin(CHARSET_INFO * cs __attribute__((unused)), + const uchar *a, size_t a_length, + const uchar *b, size_t b_length, + my_bool diff_if_only_endspace_difference) { const uchar *end; size_t length; @@ -562,14 +564,17 @@ static size_t my_strnxfrm_mb_bin(CHARSET_INFO *cs __attribute__((unused)), } -static int my_strcasecmp_mb_bin(CHARSET_INFO * cs __attribute__((unused)), - const char *s, const char *t) +int +my_strcasecmp_mb_bin(CHARSET_INFO * cs __attribute__((unused)), + const char *s, const char *t) { return strcmp(s,t); } -static void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)), - const uchar *key, size_t len,ulong *nr1, ulong *nr2) + +void +my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)), + const uchar *key, size_t len,ulong *nr1, ulong *nr2) { const uchar *pos = key; @@ -787,10 +792,11 @@ fill_max_and_min: } -static int my_wildcmp_mb_bin(CHARSET_INFO *cs, - const char *str,const char *str_end, - const char *wildstr,const char *wildend, - int escape, int w_one, int w_many) +int +my_wildcmp_mb_bin(CHARSET_INFO *cs, + const char *str,const char *str_end, + const char *wildstr,const char *wildend, + int escape, int w_one, int w_many) { int result= -1; /* Not found, using wildcards */ diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c index 56f3ddccae4..7dbec5a1321 100644 --- a/strings/ctype-uca.c +++ b/strings/ctype-uca.c @@ -9464,4 +9464,1990 @@ CHARSET_INFO my_charset_utf8_sinhala_uca_ci= #endif /* HAVE_CHARSET_utf8 */ + +#ifdef HAVE_CHARSET_utf8mb4 + +extern MY_CHARSET_HANDLER my_charset_utf8mb4_handler; + +#define MY_CS_UTF8MB4_UCA_FLAGS (MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_UNICODE_SUPPLEMENT) + +CHARSET_INFO my_charset_utf8mb4_unicode_ci= +{ + 224,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_unicode_ci",/* name */ + "", /* comment */ + "", /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + uca_length, /* sort_order */ + NULL, /* contractions */ + uca_weight, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + + +CHARSET_INFO my_charset_utf8mb4_icelandic_uca_ci= +{ + 225,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_icelandic_ci",/* name */ + "", /* comment */ + icelandic, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + +CHARSET_INFO my_charset_utf8mb4_latvian_uca_ci= +{ + 226,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_latvian_ci", /* name */ + "", /* comment */ + latvian, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + +CHARSET_INFO my_charset_utf8mb4_romanian_uca_ci= +{ + 227,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_romanian_ci", /* name */ + "", /* comment */ + romanian, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + +CHARSET_INFO my_charset_utf8mb4_slovenian_uca_ci= +{ + 228,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_slovenian_ci",/* name */ + "", /* comment */ + slovenian, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + +CHARSET_INFO my_charset_utf8mb4_polish_uca_ci= +{ + 229,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_polish_ci", /* name */ + "", /* comment */ + polish, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + +CHARSET_INFO my_charset_utf8mb4_estonian_uca_ci= +{ + 230,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_estonian_ci", /* name */ + "", /* comment */ + estonian, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + +CHARSET_INFO my_charset_utf8mb4_spanish_uca_ci= +{ + 231,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_spanish_ci", /* name */ + "", /* comment */ + spanish, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + +CHARSET_INFO my_charset_utf8mb4_swedish_uca_ci= +{ + 232,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_swedish_ci", /* name */ + "", /* comment */ + swedish, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + +CHARSET_INFO my_charset_utf8mb4_turkish_uca_ci= +{ + 233,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_turkish_ci", /* name */ + "", /* comment */ + turkish, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_turkish, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 2, /* caseup_multiply */ + 2, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + +CHARSET_INFO my_charset_utf8mb4_czech_uca_ci= +{ + 234,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_czech_ci", /* name */ + "", /* comment */ + czech, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + + +CHARSET_INFO my_charset_utf8mb4_danish_uca_ci= +{ + 235,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_danish_ci", /* name */ + "", /* comment */ + danish, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + +CHARSET_INFO my_charset_utf8mb4_lithuanian_uca_ci= +{ + 236,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_lithuanian_ci",/* name */ + "", /* comment */ + lithuanian, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + +CHARSET_INFO my_charset_utf8mb4_slovak_uca_ci= +{ + 237,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_slovak_ci", /* name */ + "", /* comment */ + slovak, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + +CHARSET_INFO my_charset_utf8mb4_spanish2_uca_ci= +{ + 238,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_spanish2_ci", /* name */ + "", /* comment */ + spanish2, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + +CHARSET_INFO my_charset_utf8mb4_roman_uca_ci= +{ + 239,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_roman_ci", /* name */ + "", /* comment */ + roman, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + +CHARSET_INFO my_charset_utf8mb4_persian_uca_ci= +{ + 240,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_persian_ci", /* name */ + "", /* comment */ + persian, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + +CHARSET_INFO my_charset_utf8mb4_esperanto_uca_ci= +{ + 241,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_esperanto_ci",/* name */ + "", /* comment */ + esperanto, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + +CHARSET_INFO my_charset_utf8mb4_hungarian_uca_ci= +{ + 242,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_hungarian_ci",/* name */ + "", /* comment */ + hungarian, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + +CHARSET_INFO my_charset_utf8mb4_sinhala_uca_ci= +{ + 243,0,0, /* number */ + MY_CS_UTF8MB4_UCA_FLAGS,/* state */ + MY_UTF8MB4, /* csname */ + MY_UTF8MB4 "_sinhala_ci",/* name */ + "", /* comment */ + sinhala, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_any_uca_handler +}; + +#endif /* HAVE_CHARSET_utf8mb4 */ + + +#ifdef HAVE_CHARSET_utf32 + +MY_COLLATION_HANDLER my_collation_utf32_uca_handler = +{ + my_coll_init_uca, /* init */ + my_strnncoll_any_uca, + my_strnncollsp_any_uca, + my_strnxfrm_any_uca, + my_strnxfrmlen_simple, + my_like_range_utf32, + my_wildcmp_uca, + NULL, + my_instr_mb, + my_hash_sort_any_uca, + my_propagate_complex +}; + +extern MY_CHARSET_HANDLER my_charset_utf32_handler; + +#define MY_CS_UTF32_UCA_FLAGS (MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII) + +CHARSET_INFO my_charset_utf32_unicode_ci= +{ + 160,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_unicode_ci", /* name */ + "", /* comment */ + "", /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + uca_length, /* sort_order */ + NULL, /* contractions */ + uca_weight, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + + +CHARSET_INFO my_charset_utf32_icelandic_uca_ci= +{ + 161,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_icelandic_ci",/* name */ + "", /* comment */ + icelandic, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + +CHARSET_INFO my_charset_utf32_latvian_uca_ci= +{ + 162,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_latvian_ci", /* name */ + "", /* comment */ + latvian, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + +CHARSET_INFO my_charset_utf32_romanian_uca_ci= +{ + 163,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_romanian_ci", /* name */ + "", /* comment */ + romanian, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + +CHARSET_INFO my_charset_utf32_slovenian_uca_ci= +{ + 164,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_slovenian_ci",/* name */ + "", /* comment */ + slovenian, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + +CHARSET_INFO my_charset_utf32_polish_uca_ci= +{ + 165,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_polish_ci", /* name */ + "", /* comment */ + polish, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + +CHARSET_INFO my_charset_utf32_estonian_uca_ci= +{ + 166,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_estonian_ci", /* name */ + "", /* comment */ + estonian, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + +CHARSET_INFO my_charset_utf32_spanish_uca_ci= +{ + 167,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_spanish_ci", /* name */ + "", /* comment */ + spanish, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + +CHARSET_INFO my_charset_utf32_swedish_uca_ci= +{ + 168,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_swedish_ci", /* name */ + "", /* comment */ + swedish, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + +CHARSET_INFO my_charset_utf32_turkish_uca_ci= +{ + 169,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_turkish_ci", /* name */ + "", /* comment */ + turkish, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_turkish, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + +CHARSET_INFO my_charset_utf32_czech_uca_ci= +{ + 170,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_czech_ci", /* name */ + "", /* comment */ + czech, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + + +CHARSET_INFO my_charset_utf32_danish_uca_ci= +{ + 171,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_danish_ci", /* name */ + "", /* comment */ + danish, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + +CHARSET_INFO my_charset_utf32_lithuanian_uca_ci= +{ + 172,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_lithuanian_ci",/* name */ + "", /* comment */ + lithuanian, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + +CHARSET_INFO my_charset_utf32_slovak_uca_ci= +{ + 173,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_slovak_ci", /* name */ + "", /* comment */ + slovak, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + +CHARSET_INFO my_charset_utf32_spanish2_uca_ci= +{ + 174,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_spanish2_ci", /* name */ + "", /* comment */ + spanish2, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + +CHARSET_INFO my_charset_utf32_roman_uca_ci= +{ + 175,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_roman_ci", /* name */ + "", /* comment */ + roman, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + +CHARSET_INFO my_charset_utf32_persian_uca_ci= +{ + 176,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_persian_ci", /* name */ + "", /* comment */ + persian, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + +CHARSET_INFO my_charset_utf32_esperanto_uca_ci= +{ + 177,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_esperanto_ci",/* name */ + "", /* comment */ + esperanto, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + +CHARSET_INFO my_charset_utf32_hungarian_uca_ci= +{ + 178,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_hungarian_ci",/* name */ + "", /* comment */ + hungarian, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + +CHARSET_INFO my_charset_utf32_sinhala_uca_ci= +{ + 179,0,0, /* number */ + MY_CS_UTF32_UCA_FLAGS,/* state */ + "utf32", /* csname */ + "utf32_sinhala_ci", /* name */ + "", /* comment */ + sinhala, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_uca_handler +}; + +#endif /* HAVE_CHARSET_utf32 */ + + +#ifdef HAVE_CHARSET_utf16 + + +MY_COLLATION_HANDLER my_collation_utf16_uca_handler = +{ + my_coll_init_uca, /* init */ + my_strnncoll_any_uca, + my_strnncollsp_any_uca, + my_strnxfrm_any_uca, + my_strnxfrmlen_simple, + my_like_range_utf16, + my_wildcmp_uca, + NULL, + my_instr_mb, + my_hash_sort_any_uca, + my_propagate_complex +}; + +extern MY_CHARSET_HANDLER my_charset_utf16_handler; + +#define MY_CS_UTF16_UCA_FLAGS (MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII) + +CHARSET_INFO my_charset_utf16_unicode_ci= +{ + 101,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* csname */ + "utf16_unicode_ci", /* name */ + "", /* comment */ + "", /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + uca_length, /* sort_order */ + NULL, /* contractions */ + uca_weight, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + + +CHARSET_INFO my_charset_utf16_icelandic_uca_ci= +{ + 102,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* csname */ + "utf16_icelandic_ci",/* name */ + "", /* comment */ + icelandic, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + +CHARSET_INFO my_charset_utf16_latvian_uca_ci= +{ + 103,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* cs name */ + "utf16_latvian_ci", /* name */ + "", /* comment */ + latvian, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + +CHARSET_INFO my_charset_utf16_romanian_uca_ci= +{ + 104,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* cs name */ + "utf16_romanian_ci", /* name */ + "", /* comment */ + romanian, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + +CHARSET_INFO my_charset_utf16_slovenian_uca_ci= +{ + 105,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* cs name */ + "utf16_slovenian_ci",/* name */ + "", /* comment */ + slovenian, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + +CHARSET_INFO my_charset_utf16_polish_uca_ci= +{ + 106,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* cs name */ + "utf16_polish_ci", /* name */ + "", /* comment */ + polish, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + +CHARSET_INFO my_charset_utf16_estonian_uca_ci= +{ + 107,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* cs name */ + "utf16_estonian_ci", /* name */ + "", /* comment */ + estonian, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + +CHARSET_INFO my_charset_utf16_spanish_uca_ci= +{ + 108,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* cs name */ + "utf16_spanish_ci", /* name */ + "", /* comment */ + spanish, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + +CHARSET_INFO my_charset_utf16_swedish_uca_ci= +{ + 109,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* cs name */ + "utf16_swedish_ci", /* name */ + "", /* comment */ + swedish, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + +CHARSET_INFO my_charset_utf16_turkish_uca_ci= +{ + 110,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* cs name */ + "utf16_turkish_ci", /* name */ + "", /* comment */ + turkish, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_turkish, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + +CHARSET_INFO my_charset_utf16_czech_uca_ci= +{ + 111,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* cs name */ + "utf16_czech_ci", /* name */ + "", /* comment */ + czech, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + + +CHARSET_INFO my_charset_utf16_danish_uca_ci= +{ + 112,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* cs name */ + "utf16_danish_ci", /* name */ + "", /* comment */ + danish, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + +CHARSET_INFO my_charset_utf16_lithuanian_uca_ci= +{ + 113,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* cs name */ + "utf16_lithuanian_ci",/* name */ + "", /* comment */ + lithuanian, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + +CHARSET_INFO my_charset_utf16_slovak_uca_ci= +{ + 114,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* cs name */ + "utf16_slovak_ci", /* name */ + "", /* comment */ + slovak, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + +CHARSET_INFO my_charset_utf16_spanish2_uca_ci= +{ + 115,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* cs name */ + "utf16_spanish2_ci",/* name */ + "", /* comment */ + spanish2, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + +CHARSET_INFO my_charset_utf16_roman_uca_ci= +{ + 116,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* cs name */ + "utf16_roman_ci", /* name */ + "", /* comment */ + roman, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + +CHARSET_INFO my_charset_utf16_persian_uca_ci= +{ + 117,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* cs name */ + "utf16_persian_ci", /* name */ + "", /* comment */ + persian, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + +CHARSET_INFO my_charset_utf16_esperanto_uca_ci= +{ + 118,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* cs name */ + "utf16_esperanto_ci",/* name */ + "", /* comment */ + esperanto, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + +CHARSET_INFO my_charset_utf16_hungarian_uca_ci= +{ + 119,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* cs name */ + "utf16_hungarian_ci",/* name */ + "", /* comment */ + hungarian, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default,/* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + +CHARSET_INFO my_charset_utf16_sinhala_uca_ci= +{ + 120,0,0, /* number */ + MY_CS_UTF16_UCA_FLAGS,/* state */ + "utf16", /* cs name */ + "utf16_sinhala_ci",/* name */ + "", /* comment */ + sinhala, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default,/* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_uca_handler +}; + +#endif /* HAVE_CHARSET_utf16 */ + + + #endif /* HAVE_UCA_COLLATIONS */ diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index 069131ba354..6de0ea8f7e8 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -25,479 +25,81 @@ #include <stdarg.h> -#ifdef HAVE_CHARSET_ucs2 - -#ifndef EILSEQ -#define EILSEQ ENOENT +#if defined(HAVE_CHARSET_utf16) || defined(HAVE_CHARSET_ucs2) +#define HAVE_CHARSET_mb2 #endif -static uchar ctype_ucs2[] = { - 0, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16, - 16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16, - 16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -static uchar to_lower_ucs2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111, - 112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95, - 96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111, - 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, - 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, - 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, - 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, - 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, - 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, - 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, - 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, - 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255 -}; +#if defined(HAVE_CHARSET_mb2) || defined(HAVE_CHARSET_utf32) +#define HAVE_CHARSET_mb2_or_mb4 +#endif -static uchar to_upper_ucs2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, - 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, - 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, - 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127, - 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, - 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, - 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, - 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, - 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, - 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, - 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, - 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255 -}; +#ifndef EILSEQ +#define EILSEQ ENOENT +#endif -static int my_ucs2_uni(CHARSET_INFO *cs __attribute__((unused)), - my_wc_t * pwc, const uchar *s, const uchar *e) -{ - if (s+2 > e) /* Need 2 characters */ - return MY_CS_TOOSMALL2; - - *pwc= ((uchar)s[0]) * 256 + ((uchar)s[1]); - return 2; -} +#undef ULONGLONG_MAX +#define ULONGLONG_MAX (~(ulonglong) 0) +#define MAX_NEGATIVE_NUMBER ((ulonglong) LL(0x8000000000000000)) +#define INIT_CNT 9 +#define LFACTOR ULL(1000000000) +#define LFACTOR1 ULL(10000000000) +#define LFACTOR2 ULL(100000000000) -static int my_uni_ucs2(CHARSET_INFO *cs __attribute__((unused)) , - my_wc_t wc, uchar *r, uchar *e) -{ - if ( r+2 > e ) - return MY_CS_TOOSMALL2; - - r[0]= (uchar) (wc >> 8); - r[1]= (uchar) (wc & 0xFF); - return 2; -} +static unsigned long lfactor[9]= +{ 1L, 10L, 100L, 1000L, 10000L, 100000L, 1000000L, 10000000L, 100000000L }; +#define REPLACEMENT_CHAR 0xFFFD; -static size_t my_caseup_ucs2(CHARSET_INFO *cs, char *src, size_t srclen, - char *dst __attribute__((unused)), - size_t dstlen __attribute__((unused))) -{ - my_wc_t wc; - int res; - char *srcend= src + srclen; - MY_UNICASE_INFO **uni_plane= cs->caseinfo; - DBUG_ASSERT(src == dst && srclen == dstlen); - - while ((src < srcend) && - (res= my_ucs2_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0) - { - int plane= (wc>>8) & 0xFF; - wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].toupper : wc; - if (res != my_uni_ucs2(cs, wc, (uchar*) src, (uchar*) srcend)) - break; - src+= res; - } - return srclen; -} -static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, size_t slen, - ulong *n1, ulong *n2) +#ifdef HAVE_CHARSET_mb2_or_mb4 +static inline int +my_bincmp(const uchar *s, const uchar *se, + const uchar *t, const uchar *te) { - my_wc_t wc; - int res; - const uchar *e=s+slen; - MY_UNICASE_INFO **uni_plane= cs->caseinfo; - - while (e > s+1 && e[-1] == ' ' && e[-2] == '\0') - e-= 2; - - while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e)) >0) - { - int plane = (wc>>8) & 0xFF; - wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc; - n1[0]^= (((n1[0] & 63)+n2[0])*(wc & 0xFF))+ (n1[0] << 8); - n2[0]+=3; - n1[0]^= (((n1[0] & 63)+n2[0])*(wc >> 8))+ (n1[0] << 8); - n2[0]+=3; - s+=res; - } + int slen= (int) (se - s), tlen= (int) (te - t); + int len= min(slen, tlen); + int cmp= memcmp(s, t, len); + return cmp ? cmp : slen - tlen; } -static size_t my_caseup_str_ucs2(CHARSET_INFO * cs __attribute__((unused)), - char * s __attribute__((unused))) +static size_t +my_caseup_str_mb2_or_mb4(CHARSET_INFO * cs __attribute__((unused)), + char * s __attribute__((unused))) { + DBUG_ASSERT(0); return 0; } -static size_t my_casedn_ucs2(CHARSET_INFO *cs, char *src, size_t srclen, - char *dst __attribute__((unused)), - size_t dstlen __attribute__((unused))) -{ - my_wc_t wc; - int res; - char *srcend= src + srclen; - MY_UNICASE_INFO **uni_plane= cs->caseinfo; - DBUG_ASSERT(src == dst && srclen == dstlen); - - while ((src < srcend) && - (res= my_ucs2_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0) - { - int plane= (wc>>8) & 0xFF; - wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].tolower : wc; - if (res != my_uni_ucs2(cs, wc, (uchar*) src, (uchar*) srcend)) - break; - src+= res; - } - return srclen; -} - - -static size_t my_casedn_str_ucs2(CHARSET_INFO *cs __attribute__((unused)), - char * s __attribute__((unused))) +static size_t +my_casedn_str_mb2_or_mb4(CHARSET_INFO *cs __attribute__((unused)), + char * s __attribute__((unused))) { + DBUG_ASSERT(0); return 0; } -static int my_strnncoll_ucs2(CHARSET_INFO *cs, - const uchar *s, size_t slen, - const uchar *t, size_t tlen, - my_bool t_is_prefix) +static int +my_strcasecmp_mb2_or_mb4(CHARSET_INFO *cs __attribute__((unused)), + const char *s __attribute__((unused)), + const char *t __attribute__((unused))) { - int s_res,t_res; - my_wc_t UNINIT_VAR(s_wc),t_wc; - const uchar *se=s+slen; - const uchar *te=t+tlen; - MY_UNICASE_INFO **uni_plane= cs->caseinfo; - - while ( s < se && t < te ) - { - int plane; - s_res=my_ucs2_uni(cs,&s_wc, s, se); - t_res=my_ucs2_uni(cs,&t_wc, t, te); - - if ( s_res <= 0 || t_res <= 0 ) - { - /* Incorrect string, compare by char value */ - return ((int)s[0]-(int)t[0]); - } - - plane=(s_wc>>8) & 0xFF; - s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc; - plane=(t_wc>>8) & 0xFF; - t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc; - if ( s_wc != t_wc ) - { - return s_wc > t_wc ? 1 : -1; - } - - s+=s_res; - t+=t_res; - } - return (int) (t_is_prefix ? t-te : ((se-s) - (te-t))); -} - -/* - Compare strings, discarding end space - - SYNOPSIS - my_strnncollsp_ucs2() - cs character set handler - a First string to compare - a_length Length of 'a' - b Second string to compare - b_length Length of 'b' - - IMPLEMENTATION - If one string is shorter as the other, then we space extend the other - so that the strings have equal length. - - This will ensure that the following things hold: - - "a" == "a " - "a\0" < "a" - "a\0" < "a " - - RETURN - < 0 a < b - = 0 a == b - > 0 a > b -*/ - -static int my_strnncollsp_ucs2(CHARSET_INFO *cs __attribute__((unused)), - const uchar *s, size_t slen, - const uchar *t, size_t tlen, - my_bool diff_if_only_endspace_difference - __attribute__((unused))) -{ - const uchar *se, *te; - size_t minlen; - MY_UNICASE_INFO **uni_plane= cs->caseinfo; - - /* extra safety to make sure the lengths are even numbers */ - slen&= ~1; - tlen&= ~1; - - se= s + slen; - te= t + tlen; - - for (minlen= min(slen, tlen); minlen; minlen-= 2) - { - int s_wc = uni_plane[s[0]] ? (int) uni_plane[s[0]][s[1]].sort : - (((int) s[0]) << 8) + (int) s[1]; - - int t_wc = uni_plane[t[0]] ? (int) uni_plane[t[0]][t[1]].sort : - (((int) t[0]) << 8) + (int) t[1]; - if ( s_wc != t_wc ) - return s_wc > t_wc ? 1 : -1; - - s+= 2; - t+= 2; - } - - if (slen != tlen) - { - int swap= 1; - if (slen < tlen) - { - s= t; - se= te; - swap= -1; - } - - for ( ; s < se ; s+= 2) - { - if (s[0] || s[1] != ' ') - return (s[0] == 0 && s[1] < ' ') ? -swap : swap; - } - } + DBUG_ASSERT(0); return 0; } -static int my_strncasecmp_ucs2(CHARSET_INFO *cs, - const char *s, const char *t, size_t len) -{ - int s_res,t_res; - my_wc_t UNINIT_VAR(s_wc),t_wc; - const char *se=s+len; - const char *te=t+len; - MY_UNICASE_INFO **uni_plane= cs->caseinfo; - - while ( s < se && t < te ) - { - int plane; - - s_res=my_ucs2_uni(cs,&s_wc, (const uchar*)s, (const uchar*)se); - t_res=my_ucs2_uni(cs,&t_wc, (const uchar*)t, (const uchar*)te); - - if ( s_res <= 0 || t_res <= 0 ) - { - /* Incorrect string, compare by char value */ - return ((int)s[0]-(int)t[0]); - } - - plane=(s_wc>>8) & 0xFF; - s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].tolower : s_wc; - - plane=(t_wc>>8) & 0xFF; - t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].tolower : t_wc; - - if ( s_wc != t_wc ) - return ((int) s_wc) - ((int) t_wc); - - s+=s_res; - t+=t_res; - } - return (int) ( (se-s) - (te-t) ); -} - - -static int my_strcasecmp_ucs2(CHARSET_INFO *cs, const char *s, const char *t) -{ - size_t s_len= strlen(s); - size_t t_len= strlen(t); - size_t len = (s_len > t_len) ? s_len : t_len; - return my_strncasecmp_ucs2(cs, s, t, len); -} - - -static size_t my_strnxfrm_ucs2(CHARSET_INFO *cs, - uchar *dst, size_t dstlen, const uchar *src, - size_t srclen) -{ - my_wc_t wc; - int res; - int plane; - uchar *de = dst + dstlen; - const uchar *se = src + srclen; - MY_UNICASE_INFO **uni_plane= cs->caseinfo; - - while( src < se && dst < de ) - { - if ((res=my_ucs2_uni(cs,&wc, src, se))<0) - { - break; - } - src+=res; - srclen-=res; - - plane=(wc>>8) & 0xFF; - wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc; - - if ((res=my_uni_ucs2(cs,wc,dst,de)) <0) - { - break; - } - dst+=res; - } - if (dst < de) - cs->cset->fill(cs, (char*) dst, (size_t) (de - dst), ' '); - return dstlen; -} - - -static uint my_ismbchar_ucs2(CHARSET_INFO *cs __attribute__((unused)), - const char *b __attribute__((unused)), - const char *e __attribute__((unused))) -{ - return 2; -} - - -static uint my_mbcharlen_ucs2(CHARSET_INFO *cs __attribute__((unused)) , - uint c __attribute__((unused))) -{ - return 2; -} - - -static int my_vsnprintf_ucs2(char *dst, size_t n, const char* fmt, va_list ap) +static long +my_strntol_mb2_or_mb4(CHARSET_INFO *cs, + const char *nptr, size_t l, int base, + char **endptr, int *err) { - char *start=dst, *end=dst+n-1; - for (; *fmt ; fmt++) - { - if (fmt[0] != '%') - { - if (dst == end) /* End of buffer */ - break; - - *dst++='\0'; *dst++= *fmt; /* Copy ordinary char */ - continue; - } - - fmt++; - - /* Skip if max size is used (to be compatible with printf) */ - while ( (*fmt>='0' && *fmt<='9') || *fmt == '.' || *fmt == '-') - fmt++; - - if (*fmt == 'l') - fmt++; - - if (*fmt == 's') /* String parameter */ - { - reg2 char *par = va_arg(ap, char *); - size_t plen; - size_t left_len = (size_t)(end-dst); - if (!par) par = (char*)"(null)"; - plen= strlen(par); - if (left_len <= plen*2) - plen = left_len/2 - 1; - - for ( ; plen ; plen--, dst+=2, par++) - { - dst[0]='\0'; - dst[1]=par[0]; - } - continue; - } - else if (*fmt == 'd' || *fmt == 'u') /* Integer parameter */ - { - register int iarg; - char nbuf[16]; - char *pbuf=nbuf; - - if ((size_t) (end-dst) < 32) - break; - iarg = va_arg(ap, int); - if (*fmt == 'd') - int10_to_str((long) iarg, nbuf, -10); - else - int10_to_str((long) (uint) iarg,nbuf,10); - - for (; pbuf[0]; pbuf++) - { - *dst++='\0'; - *dst++=*pbuf; - } - continue; - } - - /* We come here on '%%', unknown code or too long parameter */ - if (dst == end) - break; - *dst++='\0'; - *dst++='%'; /* % used as % or unknown code */ - } - - DBUG_ASSERT(dst <= end); - *dst='\0'; /* End of errmessage */ - return (size_t) (dst - start); -} - -static size_t my_snprintf_ucs2(CHARSET_INFO *cs __attribute__((unused)), - char* to, size_t n, const char* fmt, ...) -{ - va_list args; - va_start(args,fmt); - return my_vsnprintf_ucs2(to, n, fmt, args); -} - - -long my_strntol_ucs2(CHARSET_INFO *cs, - const char *nptr, size_t l, int base, - char **endptr, int *err) -{ - int negative=0; + int negative= 0; int overflow; int cnv; my_wc_t wc; @@ -511,7 +113,7 @@ long my_strntol_ucs2(CHARSET_INFO *cs, *err= 0; do { - if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0) + if ((cnv= cs->cset->mb_wc(cs, &wc, s, e))>0) { switch (wc) { @@ -524,12 +126,12 @@ long my_strntol_ucs2(CHARSET_INFO *cs, } else /* No more characters or bad multibyte sequence */ { - if (endptr !=NULL ) - *endptr = (char*)s; - err[0] = (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM; + if (endptr != NULL ) + *endptr= (char*) s; + err[0]= (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM; return 0; } - s+=cnv; + s+= cnv; } while (1); bs: @@ -539,39 +141,39 @@ bs: base = 10; #endif - overflow = 0; - res = 0; - save = s; - cutoff = ((uint32)~0L) / (uint32) base; - cutlim = (uint) (((uint32)~0L) % (uint32) base); + overflow= 0; + res= 0; + save= s; + cutoff= ((uint32)~0L) / (uint32) base; + cutlim= (uint) (((uint32)~0L) % (uint32) base); do { - if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0) + if ((cnv= cs->cset->mb_wc(cs, &wc, s, e)) > 0) { - s+=cnv; - if ( wc>='0' && wc<='9') - wc -= '0'; - else if ( wc>='A' && wc<='Z') - wc = wc - 'A' + 10; - else if ( wc>='a' && wc<='z') - wc = wc - 'a' + 10; + s+= cnv; + if (wc >= '0' && wc <= '9') + wc-= '0'; + else if (wc >= 'A' && wc <= 'Z') + wc= wc - 'A' + 10; + else if (wc >= 'a' && wc <= 'z') + wc= wc - 'a' + 10; else break; if ((int)wc >= base) break; if (res > cutoff || (res == cutoff && wc > cutlim)) - overflow = 1; + overflow= 1; else { - res *= (uint32) base; - res += wc; + res*= (uint32) base; + res+= wc; } } - else if (cnv==MY_CS_ILSEQ) + else if (cnv == MY_CS_ILSEQ) { if (endptr !=NULL ) - *endptr = (char*)s; - err[0]=EILSEQ; + *endptr = (char*) s; + err[0]= EILSEQ; return 0; } else @@ -586,21 +188,21 @@ bs: if (s == save) { - err[0]=EDOM; + err[0]= EDOM; return 0L; } if (negative) { if (res > (uint32) INT_MIN32) - overflow = 1; + overflow= 1; } else if (res > INT_MAX32) - overflow = 1; + overflow= 1; if (overflow) { - err[0]=ERANGE; + err[0]= ERANGE; return negative ? INT_MIN32 : INT_MAX32; } @@ -608,11 +210,12 @@ bs: } -ulong my_strntoul_ucs2(CHARSET_INFO *cs, - const char *nptr, size_t l, int base, - char **endptr, int *err) +static ulong +my_strntoul_mb2_or_mb4(CHARSET_INFO *cs, + const char *nptr, size_t l, int base, + char **endptr, int *err) { - int negative=0; + int negative= 0; int overflow; int cnv; my_wc_t wc; @@ -620,13 +223,13 @@ ulong my_strntoul_ucs2(CHARSET_INFO *cs, register uint32 cutoff; register uint32 res; register const uchar *s= (const uchar*) nptr; - register const uchar *e= (const uchar*) nptr+l; + register const uchar *e= (const uchar*) nptr + l; const uchar *save; *err= 0; do { - if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0) + if ((cnv= cs->cset->mb_wc(cs, &wc, s, e)) > 0) { switch (wc) { @@ -640,11 +243,11 @@ ulong my_strntoul_ucs2(CHARSET_INFO *cs, else /* No more characters or bad multibyte sequence */ { if (endptr !=NULL ) - *endptr = (char*)s; - err[0] = (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM; + *endptr= (char*)s; + err[0]= (cnv == MY_CS_ILSEQ) ? EILSEQ : EDOM; return 0; } - s+=cnv; + s+= cnv; } while (1); bs: @@ -654,40 +257,40 @@ bs: base = 10; #endif - overflow = 0; - res = 0; - save = s; - cutoff = ((uint32)~0L) / (uint32) base; - cutlim = (uint) (((uint32)~0L) % (uint32) base); + overflow= 0; + res= 0; + save= s; + cutoff= ((uint32)~0L) / (uint32) base; + cutlim= (uint) (((uint32)~0L) % (uint32) base); do { - if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0) + if ((cnv= cs->cset->mb_wc(cs, &wc, s, e)) > 0) { - s+=cnv; - if ( wc>='0' && wc<='9') - wc -= '0'; - else if ( wc>='A' && wc<='Z') - wc = wc - 'A' + 10; - else if ( wc>='a' && wc<='z') - wc = wc - 'a' + 10; + s+= cnv; + if (wc >= '0' && wc <= '9') + wc-= '0'; + else if (wc >= 'A' && wc <= 'Z') + wc= wc - 'A' + 10; + else if (wc >= 'a' && wc <= 'z') + wc= wc - 'a' + 10; else break; - if ((int)wc >= base) + if ((int) wc >= base) break; if (res > cutoff || (res == cutoff && wc > cutlim)) overflow = 1; else { - res *= (uint32) base; - res += wc; + res*= (uint32) base; + res+= wc; } } - else if (cnv==MY_CS_ILSEQ) + else if (cnv == MY_CS_ILSEQ) { - if (endptr !=NULL ) - *endptr = (char*)s; - err[0]=EILSEQ; + if (endptr != NULL ) + *endptr= (char*)s; + err[0]= EILSEQ; return 0; } else @@ -698,17 +301,17 @@ bs: } while(1); if (endptr != NULL) - *endptr = (char *) s; + *endptr= (char *) s; if (s == save) { - err[0]=EDOM; + err[0]= EDOM; return 0L; } if (overflow) { - err[0]=(ERANGE); + err[0]= (ERANGE); return (~(uint32) 0); } @@ -716,10 +319,10 @@ bs: } - -longlong my_strntoll_ucs2(CHARSET_INFO *cs, - const char *nptr, size_t l, int base, - char **endptr, int *err) +static longlong +my_strntoll_mb2_or_mb4(CHARSET_INFO *cs, + const char *nptr, size_t l, int base, + char **endptr, int *err) { int negative=0; int overflow; @@ -832,13 +435,12 @@ bs: } - - -ulonglong my_strntoull_ucs2(CHARSET_INFO *cs, - const char *nptr, size_t l, int base, - char **endptr, int *err) +static ulonglong +my_strntoull_mb2_or_mb4(CHARSET_INFO *cs, + const char *nptr, size_t l, int base, + char **endptr, int *err) { - int negative=0; + int negative= 0; int overflow; int cnv; my_wc_t wc; @@ -846,13 +448,13 @@ ulonglong my_strntoull_ucs2(CHARSET_INFO *cs, register unsigned int cutlim; register ulonglong res; register const uchar *s= (const uchar*) nptr; - register const uchar *e= (const uchar*) nptr+l; + register const uchar *e= (const uchar*) nptr + l; const uchar *save; *err= 0; do { - if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0) + if ((cnv= cs->cset->mb_wc(cs,&wc,s,e)) > 0) { switch (wc) { @@ -942,49 +544,51 @@ bs: } -double my_strntod_ucs2(CHARSET_INFO *cs __attribute__((unused)), - char *nptr, size_t length, - char **endptr, int *err) +static double +my_strntod_mb2_or_mb4(CHARSET_INFO *cs, + char *nptr, size_t length, + char **endptr, int *err) { char buf[256]; double res; - register char *b=buf; + register char *b= buf; register const uchar *s= (const uchar*) nptr; const uchar *end; my_wc_t wc; - int cnv; + int cnv; *err= 0; /* Cut too long strings */ if (length >= sizeof(buf)) - length= sizeof(buf)-1; - end= s+length; + length= sizeof(buf) - 1; + end= s + length; - while ((cnv=cs->cset->mb_wc(cs,&wc,s,end)) > 0) + while ((cnv= cs->cset->mb_wc(cs,&wc,s,end)) > 0) { - s+=cnv; + s+= cnv; if (wc > (int) (uchar) 'e' || !wc) - break; /* Can't be part of double */ + break; /* Can't be part of double */ *b++= (char) wc; } *endptr= b; res= my_strtod(buf, endptr, err); - *endptr= nptr + (size_t) (*endptr- buf); + *endptr= nptr + cs->mbminlen * (size_t) (*endptr - buf); return res; } -ulonglong my_strntoull10rnd_ucs2(CHARSET_INFO *cs __attribute__((unused)), - const char *nptr, size_t length, - int unsign_fl, - char **endptr, int *err) +static ulonglong +my_strntoull10rnd_mb2_or_mb4(CHARSET_INFO *cs, + const char *nptr, size_t length, + int unsign_fl, + char **endptr, int *err) { - char buf[256], *b= buf; + char buf[256], *b= buf; ulonglong res; const uchar *end, *s= (const uchar*) nptr; my_wc_t wc; - int cnv; + int cnv; /* Cut too long strings */ if (length >= sizeof(buf)) @@ -1000,7 +604,7 @@ ulonglong my_strntoull10rnd_ucs2(CHARSET_INFO *cs __attribute__((unused)), } res= my_strntoull10rnd_8bit(cs, buf, b - buf, unsign_fl, endptr, err); - *endptr= (char*) nptr + 2 * (size_t) (*endptr- buf); + *endptr= (char*) nptr + cs->mbminlen * (size_t) (*endptr - buf); return res; } @@ -1009,23 +613,24 @@ ulonglong my_strntoull10rnd_ucs2(CHARSET_INFO *cs __attribute__((unused)), This is a fast version optimized for the case of radix 10 / -10 */ -size_t my_l10tostr_ucs2(CHARSET_INFO *cs, - char *dst, size_t len, int radix, long int val) +static size_t +my_l10tostr_mb2_or_mb4(CHARSET_INFO *cs, + char *dst, size_t len, int radix, long int val) { char buffer[66]; register char *p, *db, *de; long int new_val; - int sl=0; + int sl= 0; unsigned long int uval = (unsigned long int) val; - p = &buffer[sizeof(buffer)-1]; - *p='\0'; + p= &buffer[sizeof(buffer) - 1]; + *p= '\0'; if (radix < 0) { if (val < 0) { - sl = 1; + sl= 1; /* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */ uval = (unsigned long int)0 - uval; } @@ -1033,57 +638,58 @@ size_t my_l10tostr_ucs2(CHARSET_INFO *cs, new_val = (long) (uval / 10); *--p = '0'+ (char) (uval - (unsigned long) new_val * 10); - val = new_val; + val= new_val; while (val != 0) { - new_val=val/10; - *--p = '0' + (char) (val-new_val*10); + new_val= val / 10; + *--p= '0' + (char) (val - new_val * 10); val= new_val; } if (sl) { - *--p='-'; + *--p= '-'; } - for ( db=dst, de=dst+len ; (dst<de) && *p ; p++) + for ( db= dst, de= dst + len ; (dst < de) && *p ; p++) { - int cnvres=cs->cset->wc_mb(cs,(my_wc_t)p[0],(uchar*) dst, (uchar*) de); - if (cnvres>0) - dst+=cnvres; + int cnvres= cs->cset->wc_mb(cs,(my_wc_t)p[0],(uchar*) dst, (uchar*) de); + if (cnvres > 0) + dst+= cnvres; else break; } - return (int) (dst-db); + return (int) (dst - db); } -size_t my_ll10tostr_ucs2(CHARSET_INFO *cs __attribute__((unused)), - char *dst, size_t len, int radix, longlong val) +static size_t +my_ll10tostr_mb2_or_mb4(CHARSET_INFO *cs, + char *dst, size_t len, int radix, longlong val) { char buffer[65]; register char *p, *db, *de; long long_val; - int sl=0; + int sl= 0; ulonglong uval= (ulonglong) val; if (radix < 0) { if (val < 0) { - sl=1; + sl= 1; /* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */ uval = (ulonglong)0 - uval; } } - p = &buffer[sizeof(buffer)-1]; + p= &buffer[sizeof(buffer)-1]; *p='\0'; if (uval == 0) { - *--p='0'; + *--p= '0'; goto cnv; } @@ -1091,7 +697,7 @@ size_t my_ll10tostr_ucs2(CHARSET_INFO *cs __attribute__((unused)), { ulonglong quo= uval/(uint) 10; uint rem= (uint) (uval- quo* (uint) 10); - *--p = '0' + rem; + *--p= '0' + rem; uval= quo; } @@ -1099,44 +705,34 @@ size_t my_ll10tostr_ucs2(CHARSET_INFO *cs __attribute__((unused)), while (long_val != 0) { long quo= long_val/10; - *--p = (char) ('0' + (long_val - quo*10)); + *--p= (char) ('0' + (long_val - quo*10)); long_val= quo; } cnv: if (sl) { - *--p='-'; + *--p= '-'; } - for ( db=dst, de=dst+len ; (dst<de) && *p ; p++) + for ( db= dst, de= dst + len ; (dst < de) && *p ; p++) { - int cnvres=cs->cset->wc_mb(cs, (my_wc_t) p[0], (uchar*) dst, (uchar*) de); - if (cnvres>0) - dst+=cnvres; + int cnvres= cs->cset->wc_mb(cs, (my_wc_t) p[0], (uchar*) dst, (uchar*) de); + if (cnvres > 0) + dst+= cnvres; else break; } - return (int) (dst-db); + return (int) (dst -db); } +#endif /* HAVE_CHARSET_mb2_or_mb4 */ -#undef ULONGLONG_MAX -#define ULONGLONG_MAX (~(ulonglong) 0) -#define MAX_NEGATIVE_NUMBER ((ulonglong) LL(0x8000000000000000)) -#define INIT_CNT 9 -#define LFACTOR ULL(1000000000) -#define LFACTOR1 ULL(10000000000) -#define LFACTOR2 ULL(100000000000) -static unsigned long lfactor[9]= -{ - 1L, 10L, 100L, 1000L, 10000L, 100000L, 1000000L, 10000000L, 100000000L -}; - - -longlong my_strtoll10_ucs2(CHARSET_INFO *cs __attribute__((unused)), - const char *nptr, char **endptr, int *error) +#ifdef HAVE_CHARSET_mb2 +static longlong +my_strtoll10_mb2(CHARSET_INFO *cs __attribute__((unused)), + const char *nptr, char **endptr, int *error) { const char *s, *end, *start, *n_end, *true_end; uchar c; @@ -1162,11 +758,11 @@ longlong my_strtoll10_ucs2(CHARSET_INFO *cs __attribute__((unused)), goto no_conv; } - /* Check for a sign. */ + /* Check for a sign. */ negative= 0; if (!s[0] && s[1] == '-') { - *error= -1; /* Mark as negative number */ + *error= -1; /* Mark as negative number */ negative= 1; s+= 2; if (s == end) @@ -1182,7 +778,7 @@ longlong my_strtoll10_ucs2(CHARSET_INFO *cs __attribute__((unused)), { s+= 2; if (s == end) - goto no_conv; + goto no_conv; } cutoff= ULONGLONG_MAX / LFACTOR2; cutoff2= ULONGLONG_MAX % LFACTOR2 / 100; @@ -1197,7 +793,7 @@ longlong my_strtoll10_ucs2(CHARSET_INFO *cs __attribute__((unused)), { s+= 2; if (s == end) - goto end_i; /* Return 0 */ + goto end_i; /* Return 0 */ } while (!s[0] && s[1] == '0'); n_end= s + 2 * INIT_CNT; @@ -1226,7 +822,7 @@ longlong my_strtoll10_ucs2(CHARSET_INFO *cs __attribute__((unused)), /* Handle next 9 digits and store them in j */ j= 0; - start= s; /* Used to know how much to shift i */ + start= s; /* Used to know how much to shift i */ n_end= true_end= s + 2 * INIT_CNT; if (n_end > end) n_end= end; @@ -1266,7 +862,7 @@ longlong my_strtoll10_ucs2(CHARSET_INFO *cs __attribute__((unused)), li=i*LFACTOR2+ (ulonglong) j*100 + k; return (longlong) li; -overflow: /* *endptr is set here */ +overflow: /* *endptr is set here */ *error= MY_ERRNO_ERANGE; return negative ? LONGLONG_MIN : (longlong) ULONGLONG_MAX; @@ -1303,6 +899,2242 @@ no_conv: } +static size_t +my_scan_mb2(CHARSET_INFO *cs __attribute__((unused)), + const char *str, const char *end, int sequence_type) +{ + const char *str0= str; + end--; /* for easier loop condition, because of two bytes per character */ + + switch (sequence_type) + { + case MY_SEQ_SPACES: + for ( ; str < end; str+= 2) + { + if (str[0] != '\0' || str[1] != ' ') + break; + } + return (size_t) (str - str0); + default: + return 0; + } +} + + +static void +my_fill_mb2(CHARSET_INFO *cs __attribute__((unused)), + char *s, size_t l, int fill) +{ + for ( ; l >= 2; s[0]= 0, s[1]= fill, s+= 2, l-= 2); +} + + +static int +my_vsnprintf_mb2(char *dst, size_t n, const char* fmt, va_list ap) +{ + char *start=dst, *end= dst + n - 1; + for (; *fmt ; fmt++) + { + if (fmt[0] != '%') + { + if (dst == end) /* End of buffer */ + break; + + *dst++='\0'; + *dst++= *fmt; /* Copy ordinary char */ + continue; + } + + fmt++; + + /* Skip if max size is used (to be compatible with printf) */ + while ( (*fmt >= '0' && *fmt <= '9') || *fmt == '.' || *fmt == '-') + fmt++; + + if (*fmt == 'l') + fmt++; + + if (*fmt == 's') /* String parameter */ + { + char *par= va_arg(ap, char *); + size_t plen; + size_t left_len= (size_t)(end-dst); + if (!par) + par= (char*) "(null)"; + plen= strlen(par); + if (left_len <= plen * 2) + plen = left_len / 2 - 1; + + for ( ; plen ; plen--, dst+=2, par++) + { + dst[0]= '\0'; + dst[1]= par[0]; + } + continue; + } + else if (*fmt == 'd' || *fmt == 'u') /* Integer parameter */ + { + int iarg; + char nbuf[16]; + char *pbuf= nbuf; + + if ((size_t) (end - dst) < 32) + break; + iarg= va_arg(ap, int); + if (*fmt == 'd') + int10_to_str((long) iarg, nbuf, -10); + else + int10_to_str((long) (uint) iarg, nbuf,10); + + for (; pbuf[0]; pbuf++) + { + *dst++= '\0'; + *dst++= *pbuf; + } + continue; + } + + /* We come here on '%%', unknown code or too long parameter */ + if (dst == end) + break; + *dst++= '\0'; + *dst++= '%'; /* % used as % or unknown code */ + } + + DBUG_ASSERT(dst <= end); + *dst='\0'; /* End of errmessage */ + return (size_t) (dst - start); +} + + +static size_t +my_snprintf_mb2(CHARSET_INFO *cs __attribute__((unused)), + char* to, size_t n, const char* fmt, ...) +{ + va_list args; + va_start(args,fmt); + return my_vsnprintf_mb2(to, n, fmt, args); +} + + +static size_t +my_lengthsp_mb2(CHARSET_INFO *cs __attribute__((unused)), + const char *ptr, size_t length) +{ + const char *end= ptr + length; + while (end > ptr + 1 && end[-1] == ' ' && end[-2] == '\0') + end-= 2; + return (size_t) (end - ptr); +} + +#endif /* HAVE_CHARSET_mb2*/ + + + + +#ifdef HAVE_CHARSET_utf16 + +/* + D800..DB7F - Non-provate surrogate high (896 pages) + DB80..DBFF - Private surrogate high (128 pages) + DC00..DFFF - Surrogate low (1024 codes in a page) +*/ + +#define MY_UTF16_HIGH_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xD8) +#define MY_UTF16_LOW_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xDC) +#define MY_UTF16_SURROGATE(x) (((x) & 0xF800) == 0xD800) + +static int +my_utf16_uni(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t *pwc, const uchar *s, const uchar *e) +{ + if (s + 2 > e) + return MY_CS_TOOSMALL2; + + /* + High bytes: 0xD[89AB] = B'110110??' + Low bytes: 0xD[CDEF] = B'110111??' + Surrogate mask: 0xFC = B'11111100' + */ + + if (MY_UTF16_HIGH_HEAD(*s)) /* Surrogate head */ + { + if (s + 4 > e) + return MY_CS_TOOSMALL4; + + if (!MY_UTF16_LOW_HEAD(s[2])) /* Broken surrigate pair */ + return MY_CS_ILSEQ; + + /* + s[0]= 110110?? (<< 18) + s[1]= ???????? (<< 10) + s[2]= 110111?? (<< 8) + s[3]= ???????? (<< 0) + */ + + *pwc= ((s[0] & 3) << 18) + (s[1] << 10) + + ((s[2] & 3) << 8) + s[3] + 0x10000; + + return 4; + } + + if (MY_UTF16_LOW_HEAD(*s)) /* Low surrogate part without high part */ + return MY_CS_ILSEQ; + + *pwc= (s[0] << 8) + s[1]; + return 2; +} + + +static int +my_uni_utf16(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t wc, uchar *s, uchar *e) +{ + if (wc <= 0xFFFF) + { + if (s + 2 > e) + return MY_CS_TOOSMALL2; + if (MY_UTF16_SURROGATE(wc)) + return MY_CS_ILUNI; + *s++= (uchar) (wc >> 8); + *s= (uchar) (wc & 0xFF); + return 2; + } + + if (wc <= 0x10FFFF) + { + if (s + 4 > e) + return MY_CS_TOOSMALL4; + *s++= (uchar) ((wc-= 0x10000) >> 18) | 0xD8; + *s++= (uchar) (wc >> 10) & 0xFF; + *s++= (uchar) ((wc >> 8) & 3) | 0xDC; + *s= (uchar) wc & 0xFF; + return 4; + } + + return MY_CS_ILUNI; +} + + +static inline void +my_tolower_utf16(MY_UNICASE_INFO **uni_plane, my_wc_t *wc) +{ + int page= *wc >> 8; + if (page < 256 && uni_plane[page]) + *wc= uni_plane[page][*wc & 0xFF].tolower; +} + + +static inline void +my_toupper_utf16(MY_UNICASE_INFO **uni_plane, my_wc_t *wc) +{ + int page= *wc >> 8; + if (page < 256 && uni_plane[page]) + *wc= uni_plane[page][*wc & 0xFF].toupper; +} + + +static inline void +my_tosort_utf16(MY_UNICASE_INFO **uni_plane, my_wc_t *wc) +{ + int page= *wc >> 8; + if (page < 256) + { + if (uni_plane[page]) + *wc= uni_plane[page][*wc & 0xFF].sort; + } + else + { + *wc= REPLACEMENT_CHAR; + } +} + + +static size_t +my_caseup_utf16(CHARSET_INFO *cs, char *src, size_t srclen, + char *dst __attribute__((unused)), + size_t dstlen __attribute__((unused))) +{ + my_wc_t wc; + int res; + char *srcend= src + srclen; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + DBUG_ASSERT(src == dst && srclen == dstlen); + + while ((src < srcend) && + (res= my_utf16_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0) + { + my_toupper_utf16(uni_plane, &wc); + if (res != my_uni_utf16(cs, wc, (uchar*) src, (uchar*) srcend)) + break; + src+= res; + } + return srclen; +} + + +static void +my_hash_sort_utf16(CHARSET_INFO *cs, const uchar *s, size_t slen, + ulong *n1, ulong *n2) +{ + my_wc_t wc; + int res; + const uchar *e= s+slen; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + + while (e > s + 1 && e[-1] == ' ' && e[-2] == '\0') + e-= 2; + + while ((s < e) && (res= my_utf16_uni(cs, &wc, (uchar *)s, (uchar*)e)) > 0) + { + my_tosort_utf16(uni_plane, &wc); + n1[0]^= (((n1[0] & 63) + n2[0]) * (wc & 0xFF)) + (n1[0] << 8); + n2[0]+= 3; + n1[0]^= (((n1[0] & 63) + n2[0]) * (wc >> 8)) + (n1[0] << 8); + n2[0]+= 3; + s+= res; + } +} + + +static size_t +my_casedn_utf16(CHARSET_INFO *cs, char *src, size_t srclen, + char *dst __attribute__((unused)), + size_t dstlen __attribute__((unused))) +{ + my_wc_t wc; + int res; + char *srcend= src + srclen; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + DBUG_ASSERT(src == dst && srclen == dstlen); + + while ((src < srcend) && + (res= my_utf16_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0) + { + my_tolower_utf16(uni_plane, &wc); + if (res != my_uni_utf16(cs, wc, (uchar*) src, (uchar*) srcend)) + break; + src+= res; + } + return srclen; +} + + +static int +my_strnncoll_utf16(CHARSET_INFO *cs, + const uchar *s, size_t slen, + const uchar *t, size_t tlen, + my_bool t_is_prefix) +{ + int s_res, t_res; + my_wc_t s_wc,t_wc; + const uchar *se= s + slen; + const uchar *te= t + tlen; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + LINT_INIT(s_wc); + LINT_INIT(t_wc); + + while (s < se && t < te) + { + s_res= my_utf16_uni(cs, &s_wc, s, se); + t_res= my_utf16_uni(cs, &t_wc, t, te); + + if (s_res <= 0 || t_res <= 0) + { + /* Incorrect string, compare by char value */ + return my_bincmp(s, se, t, te); + } + + my_tosort_utf16(uni_plane, &s_wc); + my_tosort_utf16(uni_plane, &t_wc); + + if (s_wc != t_wc) + { + return s_wc > t_wc ? 1 : -1; + } + + s+= s_res; + t+= t_res; + } + return (int) (t_is_prefix ? (t - te) : ((se - s) - (te - t))); +} + + +/** + Compare strings, discarding end space + + If one string is shorter as the other, then we space extend the other + so that the strings have equal length. + + This will ensure that the following things hold: + + "a" == "a " + "a\0" < "a" + "a\0" < "a " + + @param cs Character set pinter. + @param a First string to compare. + @param a_length Length of 'a'. + @param b Second string to compare. + @param b_length Length of 'b'. + + IMPLEMENTATION + + @return Comparison result. + @retval Negative number, if a less than b. + @retval 0, if a is equal to b + @retval Positive number, if a > b +*/ + +static int +my_strnncollsp_utf16(CHARSET_INFO *cs, + const uchar *s, size_t slen, + const uchar *t, size_t tlen, + my_bool diff_if_only_endspace_difference) +{ + int res; + my_wc_t s_wc, t_wc; + const uchar *se= s + slen, *te= t + tlen; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + LINT_INIT(s_wc); + LINT_INIT(t_wc); + + DBUG_ASSERT((slen % 2) == 0); + DBUG_ASSERT((tlen % 2) == 0); + +#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE + diff_if_only_endspace_difference= FALSE; +#endif + + while (s < se && t < te) + { + int s_res= my_utf16_uni(cs, &s_wc, s, se); + int t_res= my_utf16_uni(cs, &t_wc, t, te); + + if (s_res <= 0 || t_res <= 0) + { + /* Incorrect string, compare bytewise */ + return my_bincmp(s, se, t, te); + } + + my_tosort_utf16(uni_plane, &s_wc); + my_tosort_utf16(uni_plane, &t_wc); + + if (s_wc != t_wc) + { + return s_wc > t_wc ? 1 : -1; + } + + s+= s_res; + t+= t_res; + } + + slen= (size_t) (se - s); + tlen= (size_t) (te - t); + res= 0; + + if (slen != tlen) + { + int s_res, swap= 1; + if (diff_if_only_endspace_difference) + res= 1; /* Assume 's' is bigger */ + if (slen < tlen) + { + slen= tlen; + s= t; + se= te; + swap= -1; + res= -res; + } + + for ( ; s < se; s+= s_res) + { + if ((s_res= my_utf16_uni(cs, &s_wc, s, se)) < 0) + { + DBUG_ASSERT(0); + return 0; + } + if (s_wc != ' ') + return (s_wc < ' ') ? -swap : swap; + } + } + return res; +} + + +static uint +my_ismbchar_utf16(CHARSET_INFO *cs __attribute__((unused)), + const char *b __attribute__((unused)), + const char *e __attribute__((unused))) +{ + if (b + 2 > e) + return 0; + + if (MY_UTF16_HIGH_HEAD(*b)) + { + return (b + 4 <= e) && MY_UTF16_LOW_HEAD(b[2]) ? 4 : 0; + } + + if (MY_UTF16_LOW_HEAD(*b)) + return 0; + + return 2; +} + + +static uint +my_mbcharlen_utf16(CHARSET_INFO *cs __attribute__((unused)), + uint c __attribute__((unused))) +{ + return MY_UTF16_HIGH_HEAD(c) ? 4 : 2; +} + + +static size_t +my_numchars_utf16(CHARSET_INFO *cs, + const char *b, const char *e) +{ + size_t nchars= 0; + for ( ; ; nchars++) + { + size_t charlen= my_ismbchar_utf16(cs, b, e); + if (!charlen) + break; + b+= charlen; + } + return nchars; +} + + +static size_t +my_charpos_utf16(CHARSET_INFO *cs, + const char *b, const char *e, size_t pos) +{ + const char *b0= b; + uint charlen; + + for ( ; pos; b+= charlen, pos--) + { + if (!(charlen= my_ismbchar(cs, b, e))) + return (e + 2 - b0); /* Error, return pos outside the string */ + } + return (size_t) (pos ? (e + 2 - b0) : (b - b0)); +} + + +static size_t +my_well_formed_len_utf16(CHARSET_INFO *cs, + const char *b, const char *e, + size_t nchars, int *error) +{ + const char *b0= b; + uint charlen; + *error= 0; + + for ( ; nchars; b+= charlen, nchars--) + { + if (!(charlen= my_ismbchar(cs, b, e))) + { + *error= b < e ? 1 : 0; + break; + } + } + return (size_t) (b - b0); +} + + +static int +my_wildcmp_utf16_ci(CHARSET_INFO *cs, + const char *str,const char *str_end, + const char *wildstr,const char *wildend, + int escape, int w_one, int w_many) +{ + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend, + escape, w_one, w_many, uni_plane); +} + + +static int +my_wildcmp_utf16_bin(CHARSET_INFO *cs, + const char *str,const char *str_end, + const char *wildstr,const char *wildend, + int escape, int w_one, int w_many) +{ + return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend, + escape, w_one, w_many, NULL); +} + + +static int +my_strnncoll_utf16_bin(CHARSET_INFO *cs, + const uchar *s, size_t slen, + const uchar *t, size_t tlen, + my_bool t_is_prefix) +{ + int s_res,t_res; + my_wc_t s_wc,t_wc; + const uchar *se=s+slen; + const uchar *te=t+tlen; + LINT_INIT(s_wc); + LINT_INIT(t_wc); + + while ( s < se && t < te ) + { + s_res= my_utf16_uni(cs,&s_wc, s, se); + t_res= my_utf16_uni(cs,&t_wc, t, te); + + if (s_res <= 0 || t_res <= 0) + { + /* Incorrect string, compare by char value */ + return my_bincmp(s, se, t, te); + } + if (s_wc != t_wc) + { + return s_wc > t_wc ? 1 : -1; + } + + s+= s_res; + t+= t_res; + } + return (int) (t_is_prefix ? (t - te) : ((se - s) - (te - t))); +} + + +static int +my_strnncollsp_utf16_bin(CHARSET_INFO *cs, + const uchar *s, size_t slen, + const uchar *t, size_t tlen, + my_bool diff_if_only_endspace_difference) +{ + int res; + my_wc_t s_wc, t_wc; + const uchar *se= s + slen, *te= t + tlen; + LINT_INIT(s_wc); + LINT_INIT(t_wc); + + DBUG_ASSERT((slen % 2) == 0); + DBUG_ASSERT((tlen % 2) == 0); + +#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE + diff_if_only_endspace_difference= FALSE; +#endif + + while (s < se && t < te) + { + int s_res= my_utf16_uni(cs, &s_wc, s, se); + int t_res= my_utf16_uni(cs, &t_wc, t, te); + + if (s_res <= 0 || t_res <= 0) + { + /* Incorrect string, compare bytewise */ + return my_bincmp(s, se, t, te); + } + + if (s_wc != t_wc) + { + return s_wc > t_wc ? 1 : -1; + } + + s+= s_res; + t+= t_res; + } + + slen= (size_t) (se - s); + tlen= (size_t) (te - t); + res= 0; + + if (slen != tlen) + { + int s_res, swap= 1; + if (diff_if_only_endspace_difference) + res= 1; /* Assume 's' is bigger */ + if (slen < tlen) + { + slen= tlen; + s= t; + se= te; + swap= -1; + res= -res; + } + + for ( ; s < se; s+= s_res) + { + if ((s_res= my_utf16_uni(cs, &s_wc, s, se)) < 0) + { + DBUG_ASSERT(0); + return 0; + } + if (s_wc != ' ') + return (s_wc < ' ') ? -swap : swap; + } + } + return res; +} + + +static void +my_hash_sort_utf16_bin(CHARSET_INFO *cs __attribute__((unused)), + const uchar *key, size_t len,ulong *nr1, ulong *nr2) +{ + const uchar *pos = key; + + key+= len; + + while (key > pos + 1 && key[-1] == ' ' && key[-2] == '\0') + key-= 2; + + for (; pos < (uchar*) key ; pos++) + { + nr1[0]^= (ulong) ((((uint) nr1[0] & 63) + nr2[0]) * + ((uint)*pos)) + (nr1[0] << 8); + nr2[0]+= 3; + } +} + + +/** + Calculate min_str and max_str that ranges a LIKE string. + + @param ptr Pointer to LIKE pattern. + @param ptr_length Length of LIKE pattern. + @param escape Escape character in LIKE. (Normally '\'). + All escape characters should be removed + from min_str and max_str. + @param res_length Length of min_str and max_str. + @param min_str Smallest case sensitive string that ranges LIKE. + Should be space padded to res_length. + @param max_str Largest case sensitive string that ranges LIKE. + Normally padded with the biggest character sort value. + + @return Optimization status. + @retval FALSE if LIKE pattern can be optimized + @rerval TRUE if LIKE can't be optimized. +*/ + +my_bool +my_like_range_utf16(CHARSET_INFO *cs, + const char *ptr, size_t ptr_length, + pbool escape, pbool w_one, pbool w_many, + size_t res_length, + char *min_str,char *max_str, + size_t *min_length,size_t *max_length) +{ + const char *end=ptr+ptr_length; + char *min_org=min_str; + char *min_end=min_str+res_length; + size_t charlen= res_length / cs->mbmaxlen; + + for ( ; ptr + 1 < end && min_str + 1 < min_end && charlen > 0 + ; ptr+=2, charlen--) + { + if (ptr[0] == '\0' && ptr[1] == escape && ptr + 1 < end) + { + ptr+=2; /* Skip escape */ + *min_str++= *max_str++ = ptr[0]; + *min_str++= *max_str++ = ptr[1]; + continue; + } + if (ptr[0] == '\0' && ptr[1] == w_one) /* '_' in SQL */ + { + *min_str++= (char) (cs->min_sort_char >> 8); + *min_str++= (char) (cs->min_sort_char & 255); + *max_str++= (char) (cs->max_sort_char >> 8); + *max_str++= (char) (cs->max_sort_char & 255); + continue; + } + if (ptr[0] == '\0' && ptr[1] == w_many) /* '%' in SQL */ + { + /* + Calculate length of keys: + 'a\0\0... is the smallest possible string when we have space expand + a\ff\ff... is the biggest possible string + */ + *min_length= ((cs->state & MY_CS_BINSORT) ? (size_t) (min_str - min_org) : + res_length); + *max_length= res_length; + do { + *min_str++ = 0; + *min_str++ = 0; + *max_str++ = (char) (cs->max_sort_char >> 8); + *max_str++ = (char) (cs->max_sort_char & 255); + } while (min_str + 1 < min_end); + return FALSE; + } + *min_str++= *max_str++ = ptr[0]; + *min_str++= *max_str++ = ptr[1]; + } + + /* Temporary fix for handling w_one at end of string (key compression) */ + { + char *tmp; + for (tmp= min_str ; tmp-1 > min_org && tmp[-1] == '\0' && tmp[-2]=='\0';) + { + *--tmp=' '; + *--tmp='\0'; + } + } + + *min_length= *max_length = (size_t) (min_str - min_org); + while (min_str + 1 < min_end) + { + *min_str++ = *max_str++ = '\0'; + *min_str++ = *max_str++ = ' '; /* Because if key compression */ + } + return FALSE; +} + + +static MY_COLLATION_HANDLER my_collation_utf16_general_ci_handler = +{ + NULL, /* init */ + my_strnncoll_utf16, + my_strnncollsp_utf16, + my_strnxfrm_unicode, + my_strnxfrmlen_simple, + my_like_range_utf16, + my_wildcmp_utf16_ci, + my_strcasecmp_mb2_or_mb4, + my_instr_mb, + my_hash_sort_utf16, + my_propagate_simple +}; + + +static MY_COLLATION_HANDLER my_collation_utf16_bin_handler = +{ + NULL, /* init */ + my_strnncoll_utf16_bin, + my_strnncollsp_utf16_bin, + my_strnxfrm_unicode, + my_strnxfrmlen_simple, + my_like_range_utf16, + my_wildcmp_utf16_bin, + my_strcasecmp_mb2_or_mb4, + my_instr_mb, + my_hash_sort_utf16_bin, + my_propagate_simple +}; + + +MY_CHARSET_HANDLER my_charset_utf16_handler= +{ + NULL, /* init */ + my_ismbchar_utf16, /* ismbchar */ + my_mbcharlen_utf16, /* mbcharlen */ + my_numchars_utf16, + my_charpos_utf16, + my_well_formed_len_utf16, + my_lengthsp_mb2, + my_numcells_mb, + my_utf16_uni, /* mb_wc */ + my_uni_utf16, /* wc_mb */ + my_mb_ctype_mb, + my_caseup_str_mb2_or_mb4, + my_casedn_str_mb2_or_mb4, + my_caseup_utf16, + my_casedn_utf16, + my_snprintf_mb2, + my_l10tostr_mb2_or_mb4, + my_ll10tostr_mb2_or_mb4, + my_fill_mb2, + my_strntol_mb2_or_mb4, + my_strntoul_mb2_or_mb4, + my_strntoll_mb2_or_mb4, + my_strntoull_mb2_or_mb4, + my_strntod_mb2_or_mb4, + my_strtoll10_mb2, + my_strntoull10rnd_mb2_or_mb4, + my_scan_mb2 +}; + + +CHARSET_INFO my_charset_utf16_general_ci= +{ + 54,0,0, /* number */ + MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, + "utf16", /* cs name */ + "utf16_general_ci", /* name */ + "UTF-16 Unicode", /* comment */ + NULL, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 1, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 0, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_general_ci_handler +}; + + +CHARSET_INFO my_charset_utf16_bin= +{ + 55,0,0, /* number */ + MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONASCII, + "utf16", /* cs name */ + "utf16_bin", /* name */ + "UTF-16 Unicode", /* comment */ + NULL, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 1, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 0, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf16_handler, + &my_collation_utf16_bin_handler +}; + +#endif /* HAVE_CHARSET_utf16 */ + + +#ifdef HAVE_CHARSET_utf32 + +static int +my_utf32_uni(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t *pwc, const uchar *s, const uchar *e) +{ + if (s + 4 > e) + return MY_CS_TOOSMALL4; + *pwc= (s[0] << 24) + (s[1] << 16) + (s[2] << 8) + (s[3]); + return 4; +} + + +static int +my_uni_utf32(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t wc, uchar *s, uchar *e) +{ + if (s + 4 > e) + return MY_CS_TOOSMALL4; + + s[0]= (uchar) (wc >> 24); + s[1]= (uchar) (wc >> 16) & 0xFF; + s[2]= (uchar) (wc >> 8) & 0xFF; + s[3]= (uchar) wc & 0xFF; + return 4; +} + + +static inline void +my_tolower_utf32(MY_UNICASE_INFO **uni_plane, my_wc_t *wc) +{ + int page= *wc >> 8; + if (page < 256 && uni_plane[page]) + *wc= uni_plane[page][*wc & 0xFF].tolower; +} + + +static inline void +my_toupper_utf32(MY_UNICASE_INFO **uni_plane, my_wc_t *wc) +{ + int page= *wc >> 8; + if (page < 256 && uni_plane[page]) + *wc= uni_plane[page][*wc & 0xFF].toupper; +} + + +static inline void +my_tosort_utf32(MY_UNICASE_INFO **uni_plane, my_wc_t *wc) +{ + int page= *wc >> 8; + if (page < 256) + { + if (uni_plane[page]) + *wc= uni_plane[page][*wc & 0xFF].sort; + } + else + { + *wc= REPLACEMENT_CHAR; + } +} + + +static size_t +my_caseup_utf32(CHARSET_INFO *cs, char *src, size_t srclen, + char *dst __attribute__((unused)), + size_t dstlen __attribute__((unused))) +{ + my_wc_t wc; + int res; + char *srcend= src + srclen; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + DBUG_ASSERT(src == dst && srclen == dstlen); + + while ((src < srcend) && + (res= my_utf32_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0) + { + my_toupper_utf32(uni_plane, &wc); + if (res != my_uni_utf32(cs, wc, (uchar*) src, (uchar*) srcend)) + break; + src+= res; + } + return srclen; +} + + +static inline void +my_hash_add(ulong *n1, ulong *n2, uint ch) +{ + n1[0]^= (((n1[0] & 63) + n2[0]) * (ch)) + (n1[0] << 8); + n2[0]+= 3; +} + + +static void +my_hash_sort_utf32(CHARSET_INFO *cs, const uchar *s, size_t slen, + ulong *n1, ulong *n2) +{ + my_wc_t wc; + int res; + const uchar *e= s + slen; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + + /* Skip trailing spaces */ + while (e > s + 3 && e[-1] == ' ' && !e[-2] && !e[-3] && !e[-4]) + e-= 4; + + while ((res= my_utf32_uni(cs, &wc, (uchar*) s, (uchar*) e)) > 0) + { + my_tosort_utf32(uni_plane, &wc); + my_hash_add(n1, n2, (uint) (wc >> 24)); + my_hash_add(n1, n2, (uint) (wc >> 16) & 0xFF); + my_hash_add(n1, n2, (uint) (wc >> 8) & 0xFF); + my_hash_add(n1, n2, (uint) (wc & 0xFF)); + s+= res; + } +} + + +static size_t +my_casedn_utf32(CHARSET_INFO *cs, char *src, size_t srclen, + char *dst __attribute__((unused)), + size_t dstlen __attribute__((unused))) +{ + my_wc_t wc; + int res; + char *srcend= src + srclen; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + DBUG_ASSERT(src == dst && srclen == dstlen); + + while ((res= my_utf32_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0) + { + my_tolower_utf32(uni_plane,&wc); + if (res != my_uni_utf32(cs, wc, (uchar*) src, (uchar*) srcend)) + break; + src+= res; + } + return srclen; +} + + +static int +my_strnncoll_utf32(CHARSET_INFO *cs, + const uchar *s, size_t slen, + const uchar *t, size_t tlen, + my_bool t_is_prefix) +{ + my_wc_t s_wc,t_wc; + const uchar *se= s + slen; + const uchar *te= t + tlen; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + LINT_INIT(s_wc); + LINT_INIT(t_wc); + + while (s < se && t < te) + { + int s_res= my_utf32_uni(cs, &s_wc, s, se); + int t_res= my_utf32_uni(cs, &t_wc, t, te); + + if ( s_res <= 0 || t_res <= 0) + { + /* Incorrect string, compare by char value */ + return my_bincmp(s, se, t, te); + } + + my_tosort_utf32(uni_plane, &s_wc); + my_tosort_utf32(uni_plane, &t_wc); + + if (s_wc != t_wc) + { + return s_wc > t_wc ? 1 : -1; + } + + s+= s_res; + t+= t_res; + } + return (int) (t_is_prefix ? (t - te) : ((se - s) - (te - t))); +} + + +/** + Compare strings, discarding end space + + If one string is shorter as the other, then we space extend the other + so that the strings have equal length. + + This will ensure that the following things hold: + + "a" == "a " + "a\0" < "a" + "a\0" < "a " + + @param cs Character set pinter. + @param a First string to compare. + @param a_length Length of 'a'. + @param b Second string to compare. + @param b_length Length of 'b'. + + IMPLEMENTATION + + @return Comparison result. + @retval Negative number, if a less than b. + @retval 0, if a is equal to b + @retval Positive number, if a > b +*/ + + +static int +my_strnncollsp_utf32(CHARSET_INFO *cs, + const uchar *s, size_t slen, + const uchar *t, size_t tlen, + my_bool diff_if_only_endspace_difference) +{ + int res; + my_wc_t s_wc, t_wc; + const uchar *se= s + slen, *te= t + tlen; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + LINT_INIT(s_wc); + LINT_INIT(t_wc); + + DBUG_ASSERT((slen % 4) == 0); + DBUG_ASSERT((tlen % 4) == 0); + +#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE + diff_if_only_endspace_difference= FALSE; +#endif + + while ( s < se && t < te ) + { + int s_res= my_utf32_uni(cs, &s_wc, s, se); + int t_res= my_utf32_uni(cs, &t_wc, t, te); + + if ( s_res <= 0 || t_res <= 0 ) + { + /* Incorrect string, compare bytewise */ + return my_bincmp(s, se, t, te); + } + + my_tosort_utf32(uni_plane, &s_wc); + my_tosort_utf32(uni_plane, &t_wc); + + if ( s_wc != t_wc ) + { + return s_wc > t_wc ? 1 : -1; + } + + s+= s_res; + t+= t_res; + } + + slen= (size_t) (se - s); + tlen= (size_t) (te - t); + res= 0; + + if (slen != tlen) + { + int s_res, swap= 1; + if (diff_if_only_endspace_difference) + res= 1; /* Assume 's' is bigger */ + if (slen < tlen) + { + slen= tlen; + s= t; + se= te; + swap= -1; + res= -res; + } + + for ( ; s < se; s+= s_res) + { + if ((s_res= my_utf32_uni(cs, &s_wc, s, se)) < 0) + { + DBUG_ASSERT(0); + return 0; + } + if (s_wc != ' ') + return (s_wc < ' ') ? -swap : swap; + } + } + return res; +} + + +static size_t +my_strnxfrmlen_utf32(CHARSET_INFO *cs __attribute__((unused)), size_t len) +{ + return len / 2; +} + + +static uint +my_ismbchar_utf32(CHARSET_INFO *cs __attribute__((unused)), + const char *b __attribute__((unused)), + const char *e __attribute__((unused))) +{ + return 4; +} + + +static uint +my_mbcharlen_utf32(CHARSET_INFO *cs __attribute__((unused)) , + uint c __attribute__((unused))) +{ + return 4; +} + + +static int +my_vsnprintf_utf32(char *dst, size_t n, const char* fmt, va_list ap) +{ + char *start= dst, *end= dst + n; + DBUG_ASSERT((n % 4) == 0); + for (; *fmt ; fmt++) + { + if (fmt[0] != '%') + { + if (dst >= end) /* End of buffer */ + break; + + *dst++= '\0'; + *dst++= '\0'; + *dst++= '\0'; + *dst++= *fmt; /* Copy ordinary char */ + continue; + } + + fmt++; + + /* Skip if max size is used (to be compatible with printf) */ + while ( (*fmt>='0' && *fmt<='9') || *fmt == '.' || *fmt == '-') + fmt++; + + if (*fmt == 'l') + fmt++; + + if (*fmt == 's') /* String parameter */ + { + reg2 char *par= va_arg(ap, char *); + size_t plen; + size_t left_len= (size_t)(end - dst); + if (!par) par= (char*)"(null)"; + plen= strlen(par); + if (left_len <= plen*4) + plen= left_len / 4 - 1; + + for ( ; plen ; plen--, dst+= 4, par++) + { + dst[0]= '\0'; + dst[1]= '\0'; + dst[2]= '\0'; + dst[3]= par[0]; + } + continue; + } + else if (*fmt == 'd' || *fmt == 'u') /* Integer parameter */ + { + register int iarg; + char nbuf[16]; + char *pbuf= nbuf; + + if ((size_t) (end - dst) < 64) + break; + iarg= va_arg(ap, int); + if (*fmt == 'd') + int10_to_str((long) iarg, nbuf, -10); + else + int10_to_str((long) (uint) iarg,nbuf,10); + + for (; pbuf[0]; pbuf++) + { + *dst++= '\0'; + *dst++= '\0'; + *dst++= '\0'; + *dst++= *pbuf; + } + continue; + } + + /* We come here on '%%', unknown code or too long parameter */ + if (dst == end) + break; + *dst++= '\0'; + *dst++= '\0'; + *dst++= '\0'; + *dst++= '%'; /* % used as % or unknown code */ + } + + DBUG_ASSERT(dst < end); + *dst++= '\0'; + *dst++= '\0'; + *dst++= '\0'; + *dst++= '\0'; /* End of errmessage */ + return (size_t) (dst - start - 4); +} + + +static size_t +my_snprintf_utf32(CHARSET_INFO *cs __attribute__((unused)), + char* to, size_t n, const char* fmt, ...) +{ + va_list args; + va_start(args,fmt); + return my_vsnprintf_utf32(to, n, fmt, args); +} + + +static longlong +my_strtoll10_utf32(CHARSET_INFO *cs __attribute__((unused)), + const char *nptr, char **endptr, int *error) +{ + const char *s, *end, *start, *n_end, *true_end; + uchar c; + unsigned long i, j, k; + ulonglong li; + int negative; + ulong cutoff, cutoff2, cutoff3; + + s= nptr; + /* If fixed length string */ + if (endptr) + { + /* Make sure string length is even */ + end= s + ((*endptr - s) / 4) * 4; + while (s < end && !s[0] && !s[1] && !s[2] && + (s[3] == ' ' || s[3] == '\t')) + s+= 4; + if (s == end) + goto no_conv; + } + else + { + /* We don't support null terminated strings in UCS2 */ + goto no_conv; + } + + /* Check for a sign. */ + negative= 0; + if (!s[0] && !s[1] && !s[2] && s[3] == '-') + { + *error= -1; /* Mark as negative number */ + negative= 1; + s+= 4; + if (s == end) + goto no_conv; + cutoff= MAX_NEGATIVE_NUMBER / LFACTOR2; + cutoff2= (MAX_NEGATIVE_NUMBER % LFACTOR2) / 100; + cutoff3= MAX_NEGATIVE_NUMBER % 100; + } + else + { + *error= 0; + if (!s[0] && !s[1] && !s[2] && s[3] == '+') + { + s+= 4; + if (s == end) + goto no_conv; + } + cutoff= ULONGLONG_MAX / LFACTOR2; + cutoff2= ULONGLONG_MAX % LFACTOR2 / 100; + cutoff3= ULONGLONG_MAX % 100; + } + + /* Handle case where we have a lot of pre-zero */ + if (!s[0] && !s[1] && !s[2] && s[3] == '0') + { + i= 0; + do + { + s+= 4; + if (s == end) + goto end_i; /* Return 0 */ + } + while (!s[0] && !s[1] && !s[2] && s[3] == '0'); + n_end= s + 4 * INIT_CNT; + } + else + { + /* Read first digit to check that it's a valid number */ + if (s[0] || s[1] || s[2] || (c= (s[3]-'0')) > 9) + goto no_conv; + i= c; + s+= 4; + n_end= s + 4 * (INIT_CNT-1); + } + + /* Handle first 9 digits and store them in i */ + if (n_end > end) + n_end= end; + for (; s != n_end ; s+= 4) + { + if (s[0] || s[1] || s[2] || (c= (s[3] - '0')) > 9) + goto end_i; + i= i * 10 + c; + } + if (s == end) + goto end_i; + + /* Handle next 9 digits and store them in j */ + j= 0; + start= s; /* Used to know how much to shift i */ + n_end= true_end= s + 4 * INIT_CNT; + if (n_end > end) + n_end= end; + do + { + if (s[0] || s[1] || s[2] || (c= (s[3] - '0')) > 9) + goto end_i_and_j; + j= j * 10 + c; + s+= 4; + } while (s != n_end); + if (s == end) + { + if (s != true_end) + goto end_i_and_j; + goto end3; + } + if (s[0] || s[1] || s[2] || (c= (s[3] - '0')) > 9) + goto end3; + + /* Handle the next 1 or 2 digits and store them in k */ + k=c; + s+= 4; + if (s == end || s[0] || s[1] || s[2] || (c= (s[3]-'0')) > 9) + goto end4; + k= k * 10 + c; + s+= 2; + *endptr= (char*) s; + + /* number string should have ended here */ + if (s != end && !s[0] && !s[1] && !s[2] && (c= (s[3] - '0')) <= 9) + goto overflow; + + /* Check that we didn't get an overflow with the last digit */ + if (i > cutoff || (i == cutoff && ((j > cutoff2 || j == cutoff2) && + k > cutoff3))) + goto overflow; + li= i * LFACTOR2+ (ulonglong) j * 100 + k; + return (longlong) li; + +overflow: /* *endptr is set here */ + *error= MY_ERRNO_ERANGE; + return negative ? LONGLONG_MIN : (longlong) ULONGLONG_MAX; + +end_i: + *endptr= (char*) s; + return (negative ? ((longlong) -(long) i) : (longlong) i); + +end_i_and_j: + li= (ulonglong) i * lfactor[(size_t) (s-start) / 4] + j; + *endptr= (char*) s; + return (negative ? -((longlong) li) : (longlong) li); + +end3: + li= (ulonglong) i*LFACTOR+ (ulonglong) j; + *endptr= (char*) s; + return (negative ? -((longlong) li) : (longlong) li); + +end4: + li= (ulonglong) i*LFACTOR1+ (ulonglong) j * 10 + k; + *endptr= (char*) s; + if (negative) + { + if (li > MAX_NEGATIVE_NUMBER) + goto overflow; + return -((longlong) li); + } + return (longlong) li; + +no_conv: + /* There was no number to convert. */ + *error= MY_ERRNO_EDOM; + *endptr= (char *) nptr; + return 0; +} + + +static size_t +my_numchars_utf32(CHARSET_INFO *cs __attribute__((unused)), + const char *b, const char *e) +{ + return (size_t) (e - b) / 4; +} + + +static size_t +my_charpos_utf32(CHARSET_INFO *cs __attribute__((unused)), + const char *b, const char *e, size_t pos) +{ + size_t string_length= (size_t) (e - b); + return pos * 4 > string_length ? string_length + 4 : pos * 4; +} + + +static size_t +my_well_formed_len_utf32(CHARSET_INFO *cs __attribute__((unused)), + const char *b, const char *e, + size_t nchars, int *error) +{ + /* Ensure string length is divisible by 4 */ + const char *b0= b; + size_t length= e - b; + DBUG_ASSERT((length % 4) == 0); + *error= 0; + nchars*= 4; + if (length > nchars) + { + length= nchars; + e= b + nchars; + } + for (; b < e; b+= 4) + { + /* Don't accept characters greater than U+10FFFF */ + if (b[0] || (uchar) b[1] > 0x10) + { + *error= 1; + return b - b0; + } + } + return length; +} + + +static +void my_fill_utf32(CHARSET_INFO *cs, + char *s, size_t slen, int fill) +{ + char buf[10]; + uint buflen; + char *e= s + slen; + + DBUG_ASSERT((slen % 4) == 0); + + buflen= cs->cset->wc_mb(cs, (my_wc_t) fill, (uchar*) buf, + (uchar*) buf + sizeof(buf)); + DBUG_ASSERT(buflen == 4); + while (s < e) + { + memcpy(s, buf, 4); + s+= 4; + } +} + + +static size_t +my_lengthsp_utf32(CHARSET_INFO *cs __attribute__((unused)), + const char *ptr, size_t length) +{ + const char *end= ptr + length; + DBUG_ASSERT((length % 4) == 0); + while (end > ptr + 3 && end[-1] == ' ' && !end[-2] && !end[-3] && !end[-4]) + end-= 4; + return (size_t) (end - ptr); +} + + +static int +my_wildcmp_utf32_ci(CHARSET_INFO *cs, + const char *str, const char *str_end, + const char *wildstr, const char *wildend, + int escape, int w_one, int w_many) +{ + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend, + escape, w_one, w_many, uni_plane); +} + + +static int +my_wildcmp_utf32_bin(CHARSET_INFO *cs, + const char *str,const char *str_end, + const char *wildstr,const char *wildend, + int escape, int w_one, int w_many) +{ + return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend, + escape, w_one, w_many, NULL); +} + + +static int +my_strnncoll_utf32_bin(CHARSET_INFO *cs, + const uchar *s, size_t slen, + const uchar *t, size_t tlen, + my_bool t_is_prefix) +{ + my_wc_t s_wc, t_wc; + const uchar *se= s + slen; + const uchar *te= t + tlen; + LINT_INIT(s_wc); + LINT_INIT(t_wc); + + while (s < se && t < te) + { + int s_res= my_utf32_uni(cs, &s_wc, s, se); + int t_res= my_utf32_uni(cs, &t_wc, t, te); + + if (s_res <= 0 || t_res <= 0) + { + /* Incorrect string, compare by char value */ + return my_bincmp(s, se, t, te); + } + if (s_wc != t_wc) + { + return s_wc > t_wc ? 1 : -1; + } + + s+= s_res; + t+= t_res; + } + return (int) (t_is_prefix ? (t-te) : ((se - s) - (te - t))); +} + + +static inline my_wc_t +my_utf32_get(const uchar *s) +{ + return + ((my_wc_t) s[0] << 24) + + ((my_wc_t) s[1] << 16) + + ((my_wc_t) s[2] << 8) + + s[3]; +} + + +static int +my_strnncollsp_utf32_bin(CHARSET_INFO *cs __attribute__((unused)), + const uchar *s, size_t slen, + const uchar *t, size_t tlen, + my_bool diff_if_only_endspace_difference + __attribute__((unused))) +{ + const uchar *se, *te; + size_t minlen; + + DBUG_ASSERT((slen % 4) == 0); + DBUG_ASSERT((tlen % 4) == 0); + + se= s + slen; + te= t + tlen; + + for (minlen= min(slen, tlen); minlen; minlen-= 4) + { + my_wc_t s_wc= my_utf32_get(s); + my_wc_t t_wc= my_utf32_get(t); + if (s_wc != t_wc) + return s_wc > t_wc ? 1 : -1; + + s+= 4; + t+= 4; + } + + if (slen != tlen) + { + int swap= 1; + if (slen < tlen) + { + s= t; + se= te; + swap= -1; + } + + for ( ; s < se ; s+= 4) + { + my_wc_t s_wc= my_utf32_get(s); + if (s_wc != ' ') + return (s_wc < ' ') ? -swap : swap; + } + } + return 0; +} + + +/** + Calculate min_str and max_str that ranges a LIKE string. + + @param ptr Pointer to LIKE pattern. + @param ptr_length Length of LIKE pattern. + @param escape Escape character in LIKE. (Normally '\'). + All escape characters should be removed + from min_str and max_str. + @param res_length Length of min_str and max_str. + @param min_str Smallest case sensitive string that ranges LIKE. + Should be space padded to res_length. + @param max_str Largest case sensitive string that ranges LIKE. + Normally padded with the biggest character sort value. + + @return Optimization status. + @retval FALSE if LIKE pattern can be optimized + @rerval TRUE if LIKE can't be optimized. +*/ + +my_bool +my_like_range_utf32(CHARSET_INFO *cs, + const char *ptr, size_t ptr_length, + pbool escape, pbool w_one, pbool w_many, + size_t res_length, + char *min_str,char *max_str, + size_t *min_length,size_t *max_length) +{ + const char *end= ptr + ptr_length; + char *min_org= min_str; + char *min_end= min_str + res_length; + char *max_end= max_str + res_length; + size_t charlen= res_length / cs->mbmaxlen; + + DBUG_ASSERT((res_length % 4) == 0); + + for ( ; charlen > 0; ptr+= 4, charlen--) + { + my_wc_t wc; + int res; + if ((res= my_utf32_uni(cs, &wc, ptr, end)) < 0) + { + my_fill_utf32(cs, min_str, min_end - min_str, cs->min_sort_char); + my_fill_utf32(cs, max_str, min_end - min_str, cs->max_sort_char); + /* min_length and max_legnth are not important */ + return TRUE; + } + + if (wc == (my_wc_t) escape) + { + ptr+= 4; /* Skip escape */ + if ((res= my_utf32_uni(cs, &wc, ptr, end)) < 0) + { + my_fill_utf32(cs, min_str, min_end - min_str, cs->min_sort_char); + my_fill_utf32(cs, max_str, max_end - min_str, cs->max_sort_char); + /* min_length and max_length are not important */ + return TRUE; + } + if (my_uni_utf32(cs, wc, min_str, min_end) != 4 || + my_uni_utf32(cs, wc, max_str, max_end) != 4) + goto pad_set_lengths; + *min_str++= 4; + *max_str++= 4; + continue; + } + + if (wc == (my_wc_t) w_one) + { + if (my_uni_utf32(cs, cs->min_sort_char, min_str, min_end) != 4 || + my_uni_utf32(cs, cs->max_sort_char, max_str, max_end) != 4) + goto pad_set_lengths; + min_str+= 4; + max_str+= 4; + continue; + } + + if (wc == (my_wc_t) w_many) + { + /* + Calculate length of keys: + 'a\0\0... is the smallest possible string when we have space expand + a\ff\ff... is the biggest possible string + */ + *min_length= ((cs->state & MY_CS_BINSORT) ? + (size_t) (min_str - min_org) : + res_length); + *max_length= res_length; + goto pad_min_max; + } + + /* Normal character */ + if (my_uni_utf32(cs, wc, min_str, min_end) != 4 || + my_uni_utf32(cs, wc, max_str, max_end) != 4) + goto pad_set_lengths; + min_str+= 4; + max_str+= 4; + } + +pad_set_lengths: + *min_length= *max_length= (size_t) (min_str - min_org); + +pad_min_max: + my_fill_utf32(cs, min_str, min_end - min_str, cs->min_sort_char); + my_fill_utf32(cs, max_str, max_end - max_str, cs->max_sort_char); + return FALSE; +} + + +static size_t +my_scan_utf32(CHARSET_INFO *cs, + const char *str, const char *end, int sequence_type) +{ + const char *str0= str; + + switch (sequence_type) + { + case MY_SEQ_SPACES: + for ( ; str < end; ) + { + my_wc_t wc; + int res= my_utf32_uni(cs, &wc, str, end); + if (res < 0 || wc != ' ') + break; + str+= res; + } + return (size_t) (str - str0); + default: + return 0; + } +} + + +static MY_COLLATION_HANDLER my_collation_utf32_general_ci_handler = +{ + NULL, /* init */ + my_strnncoll_utf32, + my_strnncollsp_utf32, + my_strnxfrm_unicode, + my_strnxfrmlen_utf32, + my_like_range_utf32, + my_wildcmp_utf32_ci, + my_strcasecmp_mb2_or_mb4, + my_instr_mb, + my_hash_sort_utf32, + my_propagate_simple +}; + + +static MY_COLLATION_HANDLER my_collation_utf32_bin_handler = +{ + NULL, /* init */ + my_strnncoll_utf32_bin, + my_strnncollsp_utf32_bin, + my_strnxfrm_unicode, + my_strnxfrmlen_utf32, + my_like_range_utf32, + my_wildcmp_utf32_bin, + my_strcasecmp_mb2_or_mb4, + my_instr_mb, + my_hash_sort_utf32, + my_propagate_simple +}; + + +MY_CHARSET_HANDLER my_charset_utf32_handler= +{ + NULL, /* init */ + my_ismbchar_utf32, + my_mbcharlen_utf32, + my_numchars_utf32, + my_charpos_utf32, + my_well_formed_len_utf32, + my_lengthsp_utf32, + my_numcells_mb, + my_utf32_uni, + my_uni_utf32, + my_mb_ctype_mb, + my_caseup_str_mb2_or_mb4, + my_casedn_str_mb2_or_mb4, + my_caseup_utf32, + my_casedn_utf32, + my_snprintf_utf32, + my_l10tostr_mb2_or_mb4, + my_ll10tostr_mb2_or_mb4, + my_fill_utf32, + my_strntol_mb2_or_mb4, + my_strntoul_mb2_or_mb4, + my_strntoll_mb2_or_mb4, + my_strntoull_mb2_or_mb4, + my_strntod_mb2_or_mb4, + my_strtoll10_utf32, + my_strntoull10rnd_mb2_or_mb4, + my_scan_utf32 +}; + + +CHARSET_INFO my_charset_utf32_general_ci= +{ + 60,0,0, /* number */ + MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, + "utf32", /* cs name */ + "utf32_general_ci", /* name */ + "UTF-32 Unicode", /* comment */ + NULL, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 1, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 0, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_general_ci_handler +}; + + +CHARSET_INFO my_charset_utf32_bin= +{ + 61,0,0, /* number */ + MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONASCII, + "utf32", /* cs name */ + "utf32_bin", /* name */ + "UTF-32 Unicode", /* comment */ + NULL, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 1, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 0, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf32_handler, + &my_collation_utf32_bin_handler +}; + + +#endif /* HAVE_CHARSET_utf32 */ + + +#ifdef HAVE_CHARSET_ucs2 + +static uchar ctype_ucs2[] = { + 0, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16, + 16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16, + 16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +static uchar to_lower_ucs2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111, + 112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95, + 96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111, + 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, + 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, + 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, + 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, + 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, + 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, + 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, + 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, + 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255 +}; + +static uchar to_upper_ucs2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, + 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127, + 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, + 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, + 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, + 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, + 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, + 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, + 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, + 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255 +}; + + +static int my_ucs2_uni(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t * pwc, const uchar *s, const uchar *e) +{ + if (s+2 > e) /* Need 2 characters */ + return MY_CS_TOOSMALL2; + + *pwc= ((uchar)s[0]) * 256 + ((uchar)s[1]); + return 2; +} + +static int my_uni_ucs2(CHARSET_INFO *cs __attribute__((unused)) , + my_wc_t wc, uchar *r, uchar *e) +{ + if ( r+2 > e ) + return MY_CS_TOOSMALL2; + + r[0]= (uchar) (wc >> 8); + r[1]= (uchar) (wc & 0xFF); + return 2; +} + + +static size_t my_caseup_ucs2(CHARSET_INFO *cs, char *src, size_t srclen, + char *dst __attribute__((unused)), + size_t dstlen __attribute__((unused))) +{ + my_wc_t wc; + int res; + char *srcend= src + srclen; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + DBUG_ASSERT(src == dst && srclen == dstlen); + + while ((src < srcend) && + (res= my_ucs2_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0) + { + int plane= (wc>>8) & 0xFF; + wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].toupper : wc; + if (res != my_uni_ucs2(cs, wc, (uchar*) src, (uchar*) srcend)) + break; + src+= res; + } + return srclen; +} + + +static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, size_t slen, + ulong *n1, ulong *n2) +{ + my_wc_t wc; + int res; + const uchar *e=s+slen; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + + while (e > s+1 && e[-1] == ' ' && e[-2] == '\0') + e-= 2; + + while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e)) >0) + { + int plane = (wc>>8) & 0xFF; + wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc; + n1[0]^= (((n1[0] & 63)+n2[0])*(wc & 0xFF))+ (n1[0] << 8); + n2[0]+=3; + n1[0]^= (((n1[0] & 63)+n2[0])*(wc >> 8))+ (n1[0] << 8); + n2[0]+=3; + s+=res; + } +} + + +static size_t my_casedn_ucs2(CHARSET_INFO *cs, char *src, size_t srclen, + char *dst __attribute__((unused)), + size_t dstlen __attribute__((unused))) +{ + my_wc_t wc; + int res; + char *srcend= src + srclen; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + DBUG_ASSERT(src == dst && srclen == dstlen); + + while ((src < srcend) && + (res= my_ucs2_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0) + { + int plane= (wc>>8) & 0xFF; + wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].tolower : wc; + if (res != my_uni_ucs2(cs, wc, (uchar*) src, (uchar*) srcend)) + break; + src+= res; + } + return srclen; +} + + +static int my_strnncoll_ucs2(CHARSET_INFO *cs, + const uchar *s, size_t slen, + const uchar *t, size_t tlen, + my_bool t_is_prefix) +{ + int s_res,t_res; + my_wc_t UNINIT_VAR(s_wc),t_wc; + const uchar *se=s+slen; + const uchar *te=t+tlen; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + + while ( s < se && t < te ) + { + int plane; + s_res=my_ucs2_uni(cs,&s_wc, s, se); + t_res=my_ucs2_uni(cs,&t_wc, t, te); + + if ( s_res <= 0 || t_res <= 0 ) + { + /* Incorrect string, compare by char value */ + return ((int)s[0]-(int)t[0]); + } + + plane=(s_wc>>8) & 0xFF; + s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc; + plane=(t_wc>>8) & 0xFF; + t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc; + if ( s_wc != t_wc ) + { + return s_wc > t_wc ? 1 : -1; + } + + s+=s_res; + t+=t_res; + } + return (int) (t_is_prefix ? t-te : ((se-s) - (te-t))); +} + +/* + Compare strings, discarding end space + + SYNOPSIS + my_strnncollsp_ucs2() + cs character set handler + a First string to compare + a_length Length of 'a' + b Second string to compare + b_length Length of 'b' + + IMPLEMENTATION + If one string is shorter as the other, then we space extend the other + so that the strings have equal length. + + This will ensure that the following things hold: + + "a" == "a " + "a\0" < "a" + "a\0" < "a " + + RETURN + < 0 a < b + = 0 a == b + > 0 a > b +*/ + +static int my_strnncollsp_ucs2(CHARSET_INFO *cs __attribute__((unused)), + const uchar *s, size_t slen, + const uchar *t, size_t tlen, + my_bool diff_if_only_endspace_difference + __attribute__((unused))) +{ + const uchar *se, *te; + size_t minlen; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + + /* extra safety to make sure the lengths are even numbers */ + slen&= ~1; + tlen&= ~1; + + se= s + slen; + te= t + tlen; + + for (minlen= min(slen, tlen); minlen; minlen-= 2) + { + int s_wc = uni_plane[s[0]] ? (int) uni_plane[s[0]][s[1]].sort : + (((int) s[0]) << 8) + (int) s[1]; + + int t_wc = uni_plane[t[0]] ? (int) uni_plane[t[0]][t[1]].sort : + (((int) t[0]) << 8) + (int) t[1]; + if ( s_wc != t_wc ) + return s_wc > t_wc ? 1 : -1; + + s+= 2; + t+= 2; + } + + if (slen != tlen) + { + int swap= 1; + if (slen < tlen) + { + s= t; + se= te; + swap= -1; + } + + for ( ; s < se ; s+= 2) + { + if (s[0] || s[1] != ' ') + return (s[0] == 0 && s[1] < ' ') ? -swap : swap; + } + } + return 0; +} + + +static uint my_ismbchar_ucs2(CHARSET_INFO *cs __attribute__((unused)), + const char *b __attribute__((unused)), + const char *e __attribute__((unused))) +{ + return 2; +} + + +static uint my_mbcharlen_ucs2(CHARSET_INFO *cs __attribute__((unused)) , + uint c __attribute__((unused))) +{ + return 2; +} + + static size_t my_numchars_ucs2(CHARSET_INFO *cs __attribute__((unused)), const char *b, const char *e) @@ -1336,25 +3168,6 @@ size_t my_well_formed_len_ucs2(CHARSET_INFO *cs __attribute__((unused)), static -void my_fill_ucs2(CHARSET_INFO *cs __attribute__((unused)), - char *s, size_t l, int fill) -{ - for ( ; l >= 2; s[0]= 0, s[1]= fill, s+=2, l-=2); -} - - -static -size_t my_lengthsp_ucs2(CHARSET_INFO *cs __attribute__((unused)), - const char *ptr, size_t length) -{ - const char *end= ptr+length; - while (end > ptr+1 && end[-1] == ' ' && end[-2] == '\0') - end-=2; - return (size_t) (end-ptr); -} - - -static int my_wildcmp_ucs2_ci(CHARSET_INFO *cs, const char *str,const char *str_end, const char *wildstr,const char *wildend, @@ -1457,29 +3270,6 @@ static int my_strnncollsp_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)), static -int my_strcasecmp_ucs2_bin(CHARSET_INFO *cs, const char *s, const char *t) -{ - size_t s_len= strlen(s); - size_t t_len= strlen(t); - size_t len = (s_len > t_len) ? s_len : t_len; - return my_strncasecmp_ucs2(cs, s, t, len); -} - - -static -size_t my_strnxfrm_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)), - uchar *dst, size_t dstlen, - const uchar *src, size_t srclen) -{ - if (dst != src) - memcpy(dst,src,srclen= min(dstlen,srclen)); - if (dstlen > srclen) - cs->cset->fill(cs, (char*) dst + srclen, dstlen - srclen, ' '); - return dstlen; -} - - -static void my_hash_sort_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)), const uchar *key, size_t len,ulong *nr1, ulong *nr2) { @@ -1613,38 +3403,16 @@ fill_max_and_min: -size_t my_scan_ucs2(CHARSET_INFO *cs __attribute__((unused)), - const char *str, const char *end, int sequence_type) -{ - const char *str0= str; - end--; /* for easier loop condition, because of two bytes per character */ - - switch (sequence_type) - { - case MY_SEQ_SPACES: - for ( ; str < end; str+= 2) - { - if (str[0] != '\0' || str[1] != ' ') - break; - } - return (size_t) (str - str0); - default: - return 0; - } -} - - - static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler = { NULL, /* init */ my_strnncoll_ucs2, my_strnncollsp_ucs2, - my_strnxfrm_ucs2, + my_strnxfrm_unicode, my_strnxfrmlen_simple, my_like_range_ucs2, my_wildcmp_ucs2_ci, - my_strcasecmp_ucs2, + my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_ucs2, my_propagate_simple @@ -1656,11 +3424,11 @@ static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler = NULL, /* init */ my_strnncoll_ucs2_bin, my_strnncollsp_ucs2_bin, - my_strnxfrm_ucs2_bin, + my_strnxfrm_unicode, my_strnxfrmlen_simple, my_like_range_ucs2, my_wildcmp_ucs2_bin, - my_strcasecmp_ucs2_bin, + my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_ucs2_bin, my_propagate_simple @@ -1675,27 +3443,27 @@ MY_CHARSET_HANDLER my_charset_ucs2_handler= my_numchars_ucs2, my_charpos_ucs2, my_well_formed_len_ucs2, - my_lengthsp_ucs2, + my_lengthsp_mb2, my_numcells_mb, my_ucs2_uni, /* mb_wc */ my_uni_ucs2, /* wc_mb */ my_mb_ctype_mb, - my_caseup_str_ucs2, - my_casedn_str_ucs2, + my_caseup_str_mb2_or_mb4, + my_casedn_str_mb2_or_mb4, my_caseup_ucs2, my_casedn_ucs2, - my_snprintf_ucs2, - my_l10tostr_ucs2, - my_ll10tostr_ucs2, - my_fill_ucs2, - my_strntol_ucs2, - my_strntoul_ucs2, - my_strntoll_ucs2, - my_strntoull_ucs2, - my_strntod_ucs2, - my_strtoll10_ucs2, - my_strntoull10rnd_ucs2, - my_scan_ucs2 + my_snprintf_mb2, + my_l10tostr_mb2_or_mb4, + my_ll10tostr_mb2_or_mb4, + my_fill_mb2, + my_strntol_mb2_or_mb4, + my_strntoul_mb2_or_mb4, + my_strntoll_mb2_or_mb4, + my_strntoull_mb2_or_mb4, + my_strntod_mb2_or_mb4, + my_strtoll10_mb2, + my_strntoull10rnd_mb2_or_mb4, + my_scan_mb2 }; @@ -1764,4 +3532,4 @@ CHARSET_INFO my_charset_ucs2_bin= }; -#endif +#endif /* HAVE_CHARSET_ucs2 */ diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index 91f633e45ce..7de5cdd00ee 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -27,6 +27,16 @@ #define EILSEQ ENOENT #endif + +#define MY_UTF8MB3_GENERAL_CI MY_UTF8MB3 "_general_ci" +#define MY_UTF8MB3_GENERAL_CS MY_UTF8MB3 "_general_cs" +#define MY_UTF8MB3_BIN MY_UTF8MB3 "_bin" +#define MY_UTF8MB4_GENERAL_CI MY_UTF8MB4 "_general_ci" +#define MY_UTF8MB4_GENERAL_CS MY_UTF8MB4 "_general_cs" +#define MY_UTF8MB4_BIN MY_UTF8MB4 "_bin" + + + #ifndef HAVE_CHARSET_utf8 #define HAVE_CHARSET_utf8 #endif @@ -39,6 +49,14 @@ #define HAVE_UNIDATA #endif +#ifdef HAVE_CHARSET_utf16 +#define HAVE_UNIDATA +#endif + +#ifdef HAVE_CHARSET_utf32 +#define HAVE_UNIDATA +#endif + #ifdef HAVE_UNIDATA #include "my_uctype.h" @@ -1702,6 +1720,24 @@ MY_UNICASE_INFO *my_unicase_turkish[256]= }; +#define REPLACEMENT_CHAR 0xFFFD; + + +static inline void +my_tosort_unicode(MY_UNICASE_INFO **uni_plane, my_wc_t *wc) +{ + int page= *wc >> 8; + if (page < 256) + { + if (uni_plane[page]) + *wc= uni_plane[page][*wc & 0xFF].sort; + } + else + { + *wc= REPLACEMENT_CHAR; + } +} + /* ** Compare string against string with wildcard @@ -1712,13 +1748,14 @@ MY_UNICASE_INFO *my_unicase_turkish[256]= ** 1 if matched with wildcard */ -int my_wildcmp_unicode(CHARSET_INFO *cs, - const char *str,const char *str_end, - const char *wildstr,const char *wildend, - int escape, int w_one, int w_many, - MY_UNICASE_INFO **weights) +int +my_wildcmp_unicode(CHARSET_INFO *cs, + const char *str,const char *str_end, + const char *wildstr,const char *wildend, + int escape, int w_one, int w_many, + MY_UNICASE_INFO **weights) { - int result= -1; /* Not found, using wildcards */ + int result= -1; /* Not found, using wildcards */ my_wc_t s_wc, w_wc; int scan, plane; int (*mb_wc)(struct charset_info_st *, my_wc_t *, @@ -1734,14 +1771,14 @@ int my_wildcmp_unicode(CHARSET_INFO *cs, (const uchar*)wildend)) <= 0) return 1; - if (w_wc == (my_wc_t)w_many) + if (w_wc == (my_wc_t) w_many) { - result= 1; /* Found an anchor char */ + result= 1; /* Found an anchor char */ break; } wildstr+= scan; - if (w_wc == (my_wc_t)escape && wildstr < wildend) + if (w_wc == (my_wc_t) escape && wildstr < wildend) { if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, (const uchar*)wildend)) <= 0) @@ -1755,29 +1792,27 @@ int my_wildcmp_unicode(CHARSET_INFO *cs, return 1; str+= scan; - if (!escaped && w_wc == (my_wc_t)w_one) + if (!escaped && w_wc == (my_wc_t) w_one) { - result= 1; /* Found an anchor char */ + result= 1; /* Found an anchor char */ } else { if (weights) { - plane=(s_wc>>8) & 0xFF; - s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc; - plane=(w_wc>>8) & 0xFF; - w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc; + my_tosort_unicode(weights, &s_wc); + my_tosort_unicode(weights, &w_wc); } if (s_wc != w_wc) - return 1; /* No match */ + return 1; /* No match */ } if (wildstr == wildend) - return (str != str_end); /* Match if both are at end */ + return (str != str_end); /* Match if both are at end */ } - if (w_wc == (my_wc_t)w_many) - { /* Found w_many */ + if (w_wc == (my_wc_t) w_many) + { /* Found w_many */ /* Remove any '%' and '_' from the wild search string */ for ( ; wildstr != wildend ; ) @@ -1786,29 +1821,29 @@ int my_wildcmp_unicode(CHARSET_INFO *cs, (const uchar*)wildend)) <= 0) return 1; - if (w_wc == (my_wc_t)w_many) - { - wildstr+= scan; - continue; - } - - if (w_wc == (my_wc_t)w_one) - { - wildstr+= scan; + if (w_wc == (my_wc_t)w_many) + { + wildstr+= scan; + continue; + } + + if (w_wc == (my_wc_t)w_one) + { + wildstr+= scan; if ((scan= mb_wc(cs, &s_wc, (const uchar*)str, (const uchar*)str_end)) <=0) return 1; str+= scan; - continue; - } - break; /* Not a wild character */ + continue; + } + break; /* Not a wild character */ } if (wildstr == wildend) - return 0; /* Ok if w_many is last */ + return 0; /* Ok if w_many is last */ if (str == str_end) - return -1; + return -1; if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, (const uchar*)wildend)) <=0) @@ -1836,10 +1871,8 @@ int my_wildcmp_unicode(CHARSET_INFO *cs, return 1; if (weights) { - plane=(s_wc>>8) & 0xFF; - s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc; - plane=(w_wc>>8) & 0xFF; - w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc; + my_tosort_unicode(weights, &s_wc); + my_tosort_unicode(weights, &w_wc); } if (s_wc == w_wc) @@ -1861,8 +1894,53 @@ int my_wildcmp_unicode(CHARSET_INFO *cs, return (str != str_end ? 1 : 0); } -#endif +/* + This function is shared between utf8mb3/utf8mb4/ucs2/utf16/utf32 +*/ +size_t +my_strnxfrm_unicode(CHARSET_INFO *cs, + uchar *dst, size_t dstlen, + const uchar *src, size_t srclen) +{ + my_wc_t wc; + int res; + uchar *de= dst + dstlen; + uchar *de_beg= de - 1; + const uchar *se = src + srclen; + MY_UNICASE_INFO **uni_plane= (cs->state & MY_CS_BINSORT) ? + NULL : cs->caseinfo; + LINT_INIT(wc); + DBUG_ASSERT(src); + + while (dst < de_beg) + { + if ((res= cs->cset->mb_wc(cs,&wc, src, se)) <= 0) + break; + src+=res; + + if (uni_plane) + my_tosort_unicode(uni_plane, &wc); + + *dst++= (uchar) (wc >> 8); + if (dst < de) + *dst++= (uchar) (wc & 0xFF); + } + + while (dst < de_beg) /* Fill the tail with keys for space character */ + { + *dst++= 0x00; + *dst++= 0x20; + } + + if (dst < de) /* Clear the last byte, if "dstlen" was an odd number */ + *dst= 0x00; + + return dstlen; +} + + +#endif /* HAVE_UNIDATA */ #ifdef HAVE_CHARSET_utf8 @@ -2569,44 +2647,6 @@ size_t my_strnxfrmlen_utf8(CHARSET_INFO *cs __attribute__((unused)), } -static size_t my_strnxfrm_utf8(CHARSET_INFO *cs, - uchar *dst, size_t dstlen, - const uchar *src, size_t srclen) -{ - my_wc_t wc; - int res; - int plane; - uchar *de= dst + dstlen; - uchar *de_beg= de - 1; - const uchar *se = src + srclen; - MY_UNICASE_INFO **uni_plane= cs->caseinfo; - - while (dst < de_beg) - { - if ((res=my_utf8_uni(cs,&wc, src, se)) <= 0) - break; - src+=res; - - plane=(wc>>8) & 0xFF; - wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc; - - *dst++= (uchar)(wc >> 8); - *dst++= (uchar)(wc & 0xFF); - - } - - while (dst < de_beg) /* Fill the tail with keys for space character */ - { - *dst++= 0x00; - *dst++= 0x20; - } - - if (dst < de) /* Clear the last byte, if "dstlen" was an odd number */ - *dst= 0x00; - - return dstlen; -} - static uint my_ismbchar_utf8(CHARSET_INFO *cs,const char *b, const char *e) { my_wc_t wc; @@ -2642,7 +2682,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = NULL, /* init */ my_strnncoll_utf8, my_strnncollsp_utf8, - my_strnxfrm_utf8, + my_strnxfrm_unicode, my_strnxfrmlen_utf8, my_like_range_mb, my_wildcmp_utf8, @@ -2891,7 +2931,7 @@ static MY_COLLATION_HANDLER my_collation_cs_handler = NULL, /* init */ my_strnncoll_utf8_cs, my_strnncollsp_utf8_cs, - my_strnxfrm_utf8, + my_strnxfrm_unicode, my_strnxfrmlen_utf8, my_like_range_simple, my_wildcmp_mb, @@ -4154,7 +4194,7 @@ static MY_COLLATION_HANDLER my_collation_filename_handler = NULL, /* init */ my_strnncoll_utf8, my_strnncollsp_utf8, - my_strnxfrm_utf8, + my_strnxfrm_unicode, my_strnxfrmlen_utf8, my_like_range_mb, my_wildcmp_utf8, @@ -4284,3 +4324,859 @@ int main() +#ifdef HAVE_CHARSET_utf8mb4 + +/* + We consider bytes with code more than 127 as a letter. + This garantees that word boundaries work fine with regular + expressions. Note, there is no need to mark byte 255 as a + letter, it is illegal byte in UTF8. +*/ +static uchar ctype_utf8mb4[]= +{ + 0, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16, + 16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16, + 16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0 +}; + + +static uchar to_lower_utf8mb4[]= +{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111, + 112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95, + 96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111, + 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, + 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, + 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, + 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, + 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, + 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, + 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, + 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, + 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255 +}; + + +static uchar to_upper_utf8mb4[]= +{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, + 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127, + 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, + 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, + 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, + 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, + 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, + 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, + 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, + 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255 +}; + + +static inline int +bincmp_utf8mb4(const uchar *s, const uchar *se, + const uchar *t, const uchar *te) +{ + int slen= (int) (se - s), tlen= (int) (te - t); + int len= min(slen, tlen); + int cmp= memcmp(s, t, len); + return cmp ? cmp : slen - tlen; +} + + +static int +my_mb_wc_utf8mb4(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t * pwc, const uchar *s, const uchar *e) +{ + uchar c; + + if (s >= e) + return MY_CS_TOOSMALL; + + c= s[0]; + if (c < 0x80) + { + *pwc= c; + return 1; + } + else if (c < 0xc2) + return MY_CS_ILSEQ; + else if (c < 0xe0) + { + if (s + 2 > e) /* We need 2 characters */ + return MY_CS_TOOSMALL2; + + if (!((s[1] ^ 0x80) < 0x40)) + return MY_CS_ILSEQ; + + *pwc= ((my_wc_t) (c & 0x1f) << 6) | (my_wc_t) (s[1] ^ 0x80); + return 2; + } + else if (c < 0xf0) + { + if (s + 3 > e) /* We need 3 characters */ + return MY_CS_TOOSMALL3; + + if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && + (c >= 0xe1 || s[1] >= 0xa0))) + return MY_CS_ILSEQ; + + *pwc= ((my_wc_t) (c & 0x0f) << 12) | + ((my_wc_t) (s[1] ^ 0x80) << 6) | + (my_wc_t) (s[2] ^ 0x80); + return 3; + } + else if (c < 0xf5) + { + if (s + 4 > e) /* We need 4 characters */ + return MY_CS_TOOSMALL4; + + /* + UTF-8 quick four-byte mask: + 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + Encoding allows to encode U+00010000..U+001FFFFF + + The maximum character defined in the Unicode standard is U+0010FFFF. + Higher characters U+00110000..U+001FFFFF are not used. + + 11110000.10010000.10xxxxxx.10xxxxxx == F0.90.80.80 == U+00010000 (min) + 11110100.10001111.10111111.10111111 == F4.8F.BF.BF == U+0010FFFF (max) + + Valid codes: + [F0][90..BF][80..BF][80..BF] + [F1][80..BF][80..BF][80..BF] + [F2][80..BF][80..BF][80..BF] + [F3][80..BF][80..BF][80..BF] + [F4][80..8F][80..BF][80..BF] + */ + + if (!((s[1] ^ 0x80) < 0x40 && + (s[2] ^ 0x80) < 0x40 && + (s[3] ^ 0x80) < 0x40 && + (c >= 0xf1 || s[1] >= 0x90) && + (c <= 0xf3 || s[1] <= 0x8F))) + return MY_CS_ILSEQ; + *pwc = ((my_wc_t) (c & 0x07) << 18) | + ((my_wc_t) (s[1] ^ 0x80) << 12) | + ((my_wc_t) (s[2] ^ 0x80) << 6) | + (my_wc_t) (s[3] ^ 0x80); + return 4; + } + return MY_CS_ILSEQ; +} + + +/* + The same as above, but without range check + for example, for a null-terminated string +*/ +static int +my_mb_wc_utf8mb4_no_range(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t *pwc, const uchar *s) +{ + uchar c; + + c= s[0]; + if (c < 0x80) + { + *pwc = c; + return 1; + } + + if (c < 0xc2) + return MY_CS_ILSEQ; + + if (c < 0xe0) + { + if (!((s[1] ^ 0x80) < 0x40)) + return MY_CS_ILSEQ; + + *pwc = ((my_wc_t) (c & 0x1f) << 6) | (my_wc_t) (s[1] ^ 0x80); + return 2; + } + + if (c < 0xf0) + { + if (!((s[1] ^ 0x80) < 0x40 && + (s[2] ^ 0x80) < 0x40 && + (c >= 0xe1 || s[1] >= 0xa0))) + return MY_CS_ILSEQ; + *pwc= ((my_wc_t) (c & 0x0f) << 12) | + ((my_wc_t) (s[1] ^ 0x80) << 6) | + (my_wc_t) (s[2] ^ 0x80); + + return 3; + } + else if (c < 0xf5) + { + if (!((s[1] ^ 0x80) < 0x40 && + (s[2] ^ 0x80) < 0x40 && + (s[3] ^ 0x80) < 0x40 && + (c >= 0xf1 || s[1] >= 0x90) && + (c <= 0xf3 || s[1] <= 0x8F))) + return MY_CS_ILSEQ; + *pwc = ((my_wc_t) (c & 0x07) << 18) | + ((my_wc_t) (s[1] ^ 0x80) << 12) | + ((my_wc_t) (s[2] ^ 0x80) << 6) | + (my_wc_t) (s[3] ^ 0x80); + return 4; + } + return MY_CS_ILSEQ; +} + + +static int +my_wc_mb_utf8mb4(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t wc, uchar *r, uchar *e) +{ + int count; + + if (r >= e) + return MY_CS_TOOSMALL; + + if (wc < 0x80) + count= 1; + else if (wc < 0x800) + count= 2; + else if (wc < 0x10000) + count= 3; + else if (wc < 0x200000) + count= 4; + else return MY_CS_ILUNI; + + if (r + count > e) + return MY_CS_TOOSMALLN(count); + + switch (count) { + /* Fall through all cases!!! */ + case 4: r[3] = (uchar) (0x80 | (wc & 0x3f)); wc = wc >> 6; wc |= 0x10000; + case 3: r[2] = (uchar) (0x80 | (wc & 0x3f)); wc = wc >> 6; wc |= 0x800; + case 2: r[1] = (uchar) (0x80 | (wc & 0x3f)); wc = wc >> 6; wc |= 0xc0; + case 1: r[0] = (uchar) wc; + } + return count; +} + + +/* + The same as above, but without range check. +*/ +static int +my_wc_mb_utf8mb4_no_range(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t wc, uchar *r) +{ + int count; + + if (wc < 0x80) + count= 1; + else if (wc < 0x800) + count= 2; + else if (wc < 0x10000) + count= 3; + else if (wc < 0x200000) + count= 4; + else + return MY_CS_ILUNI; + + switch (count) + { + /* Fall through all cases!!! */ + case 4: r[3]= (uchar) (0x80 | (wc & 0x3f)); wc= wc >> 6; wc |= 0x10000; + case 3: r[2]= (uchar) (0x80 | (wc & 0x3f)); wc= wc >> 6; wc |= 0x800; + case 2: r[1]= (uchar) (0x80 | (wc & 0x3f)); wc= wc >> 6; wc |= 0xc0; + case 1: r[0]= (uchar) wc; + } + return count; +} + + +static inline void +my_tolower_utf8mb4(MY_UNICASE_INFO **uni_plane, my_wc_t *wc) +{ + int page= *wc >> 8; + if (page < 256 && uni_plane[page]) + *wc= uni_plane[page][*wc & 0xFF].tolower; +} + + +static inline void +my_toupper_utf8mb4(MY_UNICASE_INFO **uni_plane, my_wc_t *wc) +{ + int page= *wc >> 8; + if (page < 256 && uni_plane[page]) + *wc= uni_plane[page][*wc & 0xFF].toupper; +} + + +static size_t +my_caseup_utf8mb4(CHARSET_INFO *cs, char *src, size_t srclen, + char *dst, size_t dstlen) +{ + my_wc_t wc; + int srcres, dstres; + char *srcend= src + srclen, *dstend= dst + dstlen, *dst0= dst; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + DBUG_ASSERT(src != dst || cs->caseup_multiply == 1); + + while ((src < srcend) && + (srcres= my_mb_wc_utf8mb4(cs, &wc, + (uchar *) src, (uchar*) srcend)) > 0) + { + my_toupper_utf8mb4(uni_plane, &wc); + if ((dstres= my_wc_mb_utf8mb4(cs, wc, (uchar*) dst, (uchar*) dstend)) <= 0) + break; + src+= srcres; + dst+= dstres; + } + return (size_t) (dst - dst0); +} + + +static inline void +my_hash_add(ulong *n1, ulong *n2, uint ch) +{ + n1[0]^= (((n1[0] & 63) + n2[0]) * (ch)) + (n1[0] << 8); + n2[0]+= 3; +} + + +static void +my_hash_sort_utf8mb4(CHARSET_INFO *cs, const uchar *s, size_t slen, + ulong *n1, ulong *n2) +{ + my_wc_t wc; + int res; + const uchar *e= s + slen; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + + /* + Remove end space. We do this to be able to compare + 'A ' and 'A' as identical + */ + while (e > s && e[-1] == ' ') + e--; + + while ((res= my_mb_wc_utf8mb4(cs, &wc, (uchar*) s, (uchar*) e)) > 0) + { + my_tosort_unicode(uni_plane, &wc); + my_hash_add(n1, n2, (uint) (wc & 0xFF)); + my_hash_add(n1, n2, (uint) (wc >> 8) & 0xFF); + if (wc > 0xFFFF) + { + /* + Put the highest byte only if it is non-zero, + to make hash functions for utf8mb3 and utf8mb4 + compatible for BMP characters. + This is useful to keep order of records in + test results, e.g. for "SHOW GRANTS". + */ + my_hash_add(n1, n2, (uint) (wc >> 16) & 0xFF); + } + s+= res; + } +} + + +static size_t +my_caseup_str_utf8mb4(CHARSET_INFO *cs, char *src) +{ + my_wc_t wc; + int srcres, dstres; + char *dst= src, *dst0= src; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + DBUG_ASSERT(cs->caseup_multiply == 1); + + while (*src && + (srcres= my_mb_wc_utf8mb4_no_range(cs, &wc, (uchar *) src)) > 0) + { + my_toupper_utf8mb4(uni_plane, &wc); + if ((dstres= my_wc_mb_utf8mb4_no_range(cs, wc, (uchar*) dst)) <= 0) + break; + src+= srcres; + dst+= dstres; + } + *dst= '\0'; + return (size_t) (dst - dst0); +} + + +static size_t +my_casedn_utf8mb4(CHARSET_INFO *cs, + char *src, size_t srclen, + char *dst, size_t dstlen) +{ + my_wc_t wc; + int srcres, dstres; + char *srcend= src + srclen, *dstend= dst + dstlen, *dst0= dst; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + DBUG_ASSERT(src != dst || cs->casedn_multiply == 1); + + while ((src < srcend) && + (srcres= my_mb_wc_utf8mb4(cs, &wc, + (uchar*) src, (uchar*) srcend)) > 0) + { + my_tolower_utf8mb4(uni_plane, &wc); + if ((dstres= my_wc_mb_utf8mb4(cs, wc, (uchar*) dst, (uchar*) dstend)) <= 0) + break; + src+= srcres; + dst+= dstres; + } + return (size_t) (dst - dst0); +} + + +static size_t +my_casedn_str_utf8mb4(CHARSET_INFO *cs, char *src) +{ + my_wc_t wc; + int srcres, dstres; + char *dst= src, *dst0= src; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + DBUG_ASSERT(cs->casedn_multiply == 1); + + while (*src && + (srcres= my_mb_wc_utf8mb4_no_range(cs, &wc, (uchar *) src)) > 0) + { + my_tolower_utf8mb4(uni_plane, &wc); + if ((dstres= my_wc_mb_utf8mb4_no_range(cs, wc, (uchar*) dst)) <= 0) + break; + src+= srcres; + dst+= dstres; + } + + /* + In rare cases lower string can be shorter than + the original string, for example: + + "U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE" + (which is 0xC4B0 in utf8, i.e. two bytes) + + is converted into + + "U+0069 LATIN SMALL LETTER I" + (which is 0x69 in utf8, i.e. one byte) + + So, we need to put '\0' terminator after converting. + */ + + *dst= '\0'; + return (size_t) (dst - dst0); +} + + +static int +my_strnncoll_utf8mb4(CHARSET_INFO *cs, + const uchar *s, size_t slen, + const uchar *t, size_t tlen, + my_bool t_is_prefix) +{ + my_wc_t s_wc,t_wc; + const uchar *se= s + slen; + const uchar *te= t + tlen; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + LINT_INIT(s_wc); + LINT_INIT(t_wc); + + while ( s < se && t < te ) + { + int s_res= my_mb_wc_utf8mb4(cs, &s_wc, s, se); + int t_res= my_mb_wc_utf8mb4(cs, &t_wc, t, te); + + if ( s_res <= 0 || t_res <= 0 ) + { + /* Incorrect string, compare bytewise */ + return bincmp_utf8mb4(s, se, t, te); + } + + my_tosort_unicode(uni_plane, &s_wc); + my_tosort_unicode(uni_plane, &t_wc); + + if ( s_wc != t_wc ) + { + return s_wc > t_wc ? 1 : -1; + } + + s+= s_res; + t+= t_res; + } + return (int) (t_is_prefix ? (t - te) : ((se - s) - (te - t))); +} + + +/** + + Compare strings, discarding end space + + If one string is shorter as the other, then we space extend the other + so that the strings have equal length. + + This will ensure that the following things hold: + + "a" == "a " + "a\0" < "a" + "a\0" < "a " + + @param cs Character set pinter. + @param a First string to compare. + @param a_length Length of 'a'. + @param b Second string to compare. + @param b_length Length of 'b'. + @param diff_if_only_endspace_difference + Set to 1 if the strings should be regarded as different + if they only difference in end space + + @return Comparison result. + @retval Negative number, if a less than b. + @retval 0, if a is equal to b + @retval Positive number, if a > b +*/ + +static int +my_strnncollsp_utf8mb4(CHARSET_INFO *cs, + const uchar *s, size_t slen, + const uchar *t, size_t tlen, + my_bool diff_if_only_endspace_difference) +{ + int res; + my_wc_t s_wc, t_wc; + const uchar *se= s + slen, *te= t + tlen; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + LINT_INIT(s_wc); + LINT_INIT(t_wc); + +#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE + diff_if_only_endspace_difference= FALSE; +#endif + + while ( s < se && t < te ) + { + int s_res= my_mb_wc_utf8mb4(cs, &s_wc, s, se); + int t_res= my_mb_wc_utf8mb4(cs, &t_wc, t, te); + + if ( s_res <= 0 || t_res <= 0 ) + { + /* Incorrect string, compare bytewise */ + return bincmp_utf8mb4(s, se, t, te); + } + + my_tosort_unicode(uni_plane, &s_wc); + my_tosort_unicode(uni_plane, &t_wc); + + if ( s_wc != t_wc ) + { + return s_wc > t_wc ? 1 : -1; + } + + s+=s_res; + t+=t_res; + } + + slen= (size_t) (se-s); + tlen= (size_t) (te-t); + res= 0; + + if (slen != tlen) + { + int swap= 1; + if (diff_if_only_endspace_difference) + res= 1; /* Assume 'a' is bigger */ + if (slen < tlen) + { + slen= tlen; + s= t; + se= te; + swap= -1; + res= -res; + } + /* + This following loop uses the fact that in UTF-8 + all multibyte characters are greater than space, + and all multibyte head characters are greater than + space. It means if we meet a character greater + than space, it always means that the longer string + is greater. So we can reuse the same loop from the + 8bit version, without having to process full multibute + sequences. + */ + for ( ; s < se; s++) + { + if (*s != ' ') + return (*s < ' ') ? -swap : swap; + } + } + return res; +} + + +/** + Compare 0-terminated UTF8 strings. + + @param cs character set handler + @param s First 0-terminated string to compare + @param t Second 0-terminated string to compare + + @return Comparison result. + @retval negative number if s < t + @retval positive number if s > t + @retval 0 is the strings are equal +*/ + +static int +my_strcasecmp_utf8mb4(CHARSET_INFO *cs, const char *s, const char *t) +{ + MY_UNICASE_INFO **uni_plane= cs->caseinfo; + while (s[0] && t[0]) + { + my_wc_t s_wc,t_wc; + + if ((uchar) s[0] < 128) + { + /* + s[0] is between 0 and 127. + It represents a single byte character. + Convert it into weight according to collation. + */ + s_wc= plane00[(uchar) s[0]].tolower; + s++; + } + else + { + int res= my_mb_wc_utf8mb4_no_range(cs, &s_wc, (const uchar*) s); + + /* + In the case of wrong multibyte sequence we will + call strcmp() for byte-to-byte comparison. + */ + if (res <= 0) + return strcmp(s, t); + s+= res; + + my_tolower_utf8mb4(uni_plane, &s_wc); + } + + + /* Do the same for the second string */ + + if ((uchar) t[0] < 128) + { + /* Convert single byte character into weight */ + t_wc= plane00[(uchar) t[0]].tolower; + t++; + } + else + { + int res= my_mb_wc_utf8mb4_no_range(cs, &t_wc, (const uchar*) t); + if (res <= 0) + return strcmp(s, t); + t+= res; + + my_tolower_utf8mb4(uni_plane, &t_wc); + } + + /* Now we have two weights, let's compare them */ + if ( s_wc != t_wc ) + return ((int) s_wc) - ((int) t_wc); + } + return ((int) (uchar) s[0]) - ((int) (uchar) t[0]); +} + + +static int +my_wildcmp_utf8mb4(CHARSET_INFO *cs, + const char *str, const char *strend, + const char *wildstr, const char *wildend, + int escape, int w_one, int w_many) +{ + return my_wildcmp_unicode(cs, str, strend, wildstr, wildend, + escape, w_one, w_many, cs->caseinfo); +} + + +static size_t +my_strnxfrmlen_utf8mb4(CHARSET_INFO *cs __attribute__((unused)), size_t len) +{ + /* TODO: fix when working on WL "Unicode new version" */ + return (len * 2 + 2) / 4; +} + + +static uint +my_ismbchar_utf8mb4(CHARSET_INFO *cs, const char *b, const char *e) +{ + my_wc_t wc; + int res= my_mb_wc_utf8mb4(cs,&wc, (const uchar*)b, (const uchar*)e); + return (res > 1) ? res : 0; +} + + +static uint +my_mbcharlen_utf8mb4(CHARSET_INFO *cs __attribute__((unused)), uint c) +{ + if (c < 0x80) + return 1; + if (c < 0xc2) + return 0; /* Illegal mb head */ + if (c < 0xe0) + return 2; + if (c < 0xf0) + return 3; + if (c < 0xf8) + return 4; + return 0; /* Illegal mb head */; +} + + +static MY_COLLATION_HANDLER my_collation_utf8mb4_general_ci_handler= +{ + NULL, /* init */ + my_strnncoll_utf8mb4, + my_strnncollsp_utf8mb4, + my_strnxfrm_unicode, + my_strnxfrmlen_utf8mb4, + my_like_range_mb, + my_wildcmp_utf8mb4, + my_strcasecmp_utf8mb4, + my_instr_mb, + my_hash_sort_utf8mb4, + my_propagate_complex +}; + + +static MY_COLLATION_HANDLER my_collation_utf8mb4_bin_handler = +{ + NULL, /* init */ + my_strnncoll_mb_bin, + my_strnncollsp_mb_bin, + my_strnxfrm_unicode, + my_strnxfrmlen_utf8mb4, + my_like_range_mb, + my_wildcmp_mb_bin, + my_strcasecmp_mb_bin, + my_instr_mb, + my_hash_sort_mb_bin, + my_propagate_simple +}; + + +MY_CHARSET_HANDLER my_charset_utf8mb4_handler= +{ + NULL, /* init */ + my_ismbchar_utf8mb4, + my_mbcharlen_utf8mb4, + my_numchars_mb, + my_charpos_mb, + my_well_formed_len_mb, + my_lengthsp_8bit, + my_numcells_mb, + my_mb_wc_utf8mb4, + my_wc_mb_utf8mb4, + my_mb_ctype_mb, + my_caseup_str_utf8mb4, + my_casedn_str_utf8mb4, + my_caseup_utf8mb4, + my_casedn_utf8mb4, + my_snprintf_8bit, + my_long10_to_str_8bit, + my_longlong10_to_str_8bit, + my_fill_8bit, + my_strntol_8bit, + my_strntoul_8bit, + my_strntoll_8bit, + my_strntoull_8bit, + my_strntod_8bit, + my_strtoll10_8bit, + my_strntoull10rnd_8bit, + my_scan_8bit +}; + + + +CHARSET_INFO my_charset_utf8mb4_general_ci= +{ + 45,0,0, /* number */ + MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_UNICODE_SUPPLEMENT, /* state */ + MY_UTF8MB4, /* cs name */ + MY_UTF8MB4_GENERAL_CI,/* name */ + "UTF-8 Unicode", /* comment */ + NULL, /* tailoring */ + ctype_utf8mb4, /* ctype */ + to_lower_utf8mb4, /* to_lower */ + to_upper_utf8mb4, /* to_upper */ + to_upper_utf8mb4, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 1, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 0, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_utf8mb4_general_ci_handler +}; + + +CHARSET_INFO my_charset_utf8mb4_bin= +{ + 46,0,0, /* number */ + MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_UNICODE_SUPPLEMENT, /* state */ + MY_UTF8MB4, /* cs name */ + MY_UTF8MB4_BIN, /* name */ + "UTF-8 Unicode", /* comment */ + NULL, /* tailoring */ + ctype_utf8mb4, /* ctype */ + to_lower_utf8mb4, /* to_lower */ + to_upper_utf8mb4, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 1, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 4, /* mbmaxlen */ + 0, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8mb4_handler, + &my_collation_utf8mb4_bin_handler +}; + +#endif /* HAVE_CHARSET_utf8mb4 */ |