diff options
author | Alexander Barkov <bar@mnogosearch.org> | 2013-10-23 20:25:52 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mnogosearch.org> | 2013-10-23 20:25:52 +0400 |
commit | 426d246f5b8a4c598a913838b625e05e63ccb41f (patch) | |
tree | 16792dd3268c0ec0fb02837c761a535af1938b1f /strings | |
parent | de8e306b6921ab32aedde957594e570166a7fce1 (diff) | |
download | mariadb-git-426d246f5b8a4c598a913838b625e05e63ccb41f.tar.gz |
MDEV-5163 Merge WEIGHT_STRING function from MySQL-5.6
Diffstat (limited to 'strings')
-rw-r--r-- | strings/ctype-big5.c | 48 | ||||
-rw-r--r-- | strings/ctype-bin.c | 34 | ||||
-rw-r--r-- | strings/ctype-cp932.c | 28 | ||||
-rw-r--r-- | strings/ctype-czech.c | 40 | ||||
-rw-r--r-- | strings/ctype-euc_kr.c | 4 | ||||
-rw-r--r-- | strings/ctype-eucjpms.c | 4 | ||||
-rw-r--r-- | strings/ctype-extra.c | 62 | ||||
-rw-r--r-- | strings/ctype-gb2312.c | 4 | ||||
-rw-r--r-- | strings/ctype-gbk.c | 46 | ||||
-rw-r--r-- | strings/ctype-latin1.c | 32 | ||||
-rw-r--r-- | strings/ctype-mb.c | 109 | ||||
-rw-r--r-- | strings/ctype-simple.c | 164 | ||||
-rw-r--r-- | strings/ctype-sjis.c | 27 | ||||
-rw-r--r-- | strings/ctype-tis620.c | 32 | ||||
-rw-r--r-- | strings/ctype-uca.c | 174 | ||||
-rw-r--r-- | strings/ctype-ucs2.c | 9 | ||||
-rw-r--r-- | strings/ctype-ujis.c | 4 | ||||
-rw-r--r-- | strings/ctype-utf8.c | 205 | ||||
-rw-r--r-- | strings/ctype-win1250ch.c | 33 |
19 files changed, 803 insertions, 256 deletions
diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c index cf9fc339280..38bdf86c64a 100644 --- a/strings/ctype-big5.c +++ b/strings/ctype-big5.c @@ -925,31 +925,35 @@ static int my_strnncollsp_big5(CHARSET_INFO * cs __attribute__((unused)), } -static size_t my_strnxfrm_big5(CHARSET_INFO *cs __attribute__((unused)), - uchar *dest, size_t len, - const uchar *src, size_t srclen) +static size_t +my_strnxfrm_big5(CHARSET_INFO *cs, + uchar *dst, size_t dstlen, uint nweights, + const uchar *src, size_t srclen, uint flags) { - uint16 e; - size_t dstlen= len; - uchar *dest_end= dest + dstlen; - - len = srclen; - while (len-- && dest < dest_end) + uchar *d0= dst; + uchar *de= dst + dstlen; + const uchar *se= src + srclen; + const uchar *sort_order= cs->sort_order; + + for (; dst < de && src < se && nweights; nweights--) { - if ((len > 0) && isbig5code(*src, *(src+1))) + if (cs->cset->ismbchar(cs, (const char*) src, (const char*) se)) { - e = big5strokexfrm((uint16) big5code(*src, *(src+1))); - *dest++ = big5head(e); - if (dest < dest_end) - *dest++ = big5tail(e); - src +=2; - len--; - } else - *dest++ = sort_order_big5[(uchar) *src++]; + /* + Note, it is safe not to check (src < se) + in the code below, because ismbchar() would + not return TRUE if src was too short + */ + uint16 e= big5strokexfrm((uint16) big5code(*src, *(src + 1))); + *dst++= big5head(e); + if (dst < de) + *dst++= big5tail(e); + src+= 2; + } + else + *dst++= sort_order ? sort_order[*src++] : *src++; } - if (dstlen > srclen) - bfill(dest, dstlen - srclen, ' '); - return dstlen; + return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, de, nweights, flags, 0); } #if 0 @@ -6948,6 +6952,7 @@ struct charset_info_st my_charset_big5_chinese_ci= 0xF9D5, /* max_sort_char */ ' ', /* pad char */ 1, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_big5_handler, &my_collation_big5_chinese_ci_handler }; @@ -6980,6 +6985,7 @@ struct charset_info_st my_charset_big5_bin= 0xF9FE, /* max_sort_char */ ' ', /* pad char */ 1, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_big5_handler, &my_collation_mb_bin_handler }; diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c index 76e8da25fc2..52dae7912af 100644 --- a/strings/ctype-bin.c +++ b/strings/ctype-bin.c @@ -409,28 +409,17 @@ int my_wildcmp_bin(CHARSET_INFO *cs, } -static size_t my_strnxfrm_bin(CHARSET_INFO *cs __attribute__((unused)), - uchar *dest, size_t dstlen, - const uchar *src, size_t srclen) +static size_t +my_strnxfrm_8bit_bin(CHARSET_INFO *cs, + uchar * dst, size_t dstlen, uint nweights, + const uchar *src, size_t srclen, uint flags) { - if (dest != src) - memcpy(dest, src, MY_MIN(dstlen,srclen)); - if (dstlen > srclen) - bfill(dest + srclen, dstlen - srclen, 0); - return dstlen; -} - - -static -size_t my_strnxfrm_8bit_bin(CHARSET_INFO *cs __attribute__((unused)), - uchar *dest, size_t dstlen, - const uchar *src, size_t srclen) -{ - if (dest != src) - memcpy(dest, src, MY_MIN(dstlen,srclen)); - if (dstlen > srclen) - bfill(dest + srclen, dstlen - srclen, ' '); - return dstlen; + set_if_smaller(srclen, dstlen); + set_if_smaller(srclen, nweights); + if (dst != src) + memcpy(dst, src, srclen); + return my_strxfrm_pad_desc_and_reverse(cs, dst, dst + srclen, dst + dstlen, + nweights - srclen, flags, 0); } @@ -516,7 +505,7 @@ static MY_COLLATION_HANDLER my_collation_binary_handler = NULL, /* init */ my_strnncoll_binary, my_strnncollsp_binary, - my_strnxfrm_bin, + my_strnxfrm_8bit_bin, my_strnxfrmlen_simple, my_like_range_simple, my_wildcmp_bin, @@ -586,6 +575,7 @@ struct charset_info_st my_charset_bin = 255, /* max_sort_char */ 0, /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_handler, &my_collation_binary_handler }; diff --git a/strings/ctype-cp932.c b/strings/ctype-cp932.c index 946cf4253d8..b3d08c5a54c 100644 --- a/strings/ctype-cp932.c +++ b/strings/ctype-cp932.c @@ -1794,30 +1794,6 @@ static int my_strnncollsp_cp932(CHARSET_INFO *cs __attribute__((unused)), } - -static size_t my_strnxfrm_cp932(CHARSET_INFO *cs __attribute__((unused)), - uchar *dest, size_t len, - const uchar *src, size_t srclen) -{ - uchar *d_end = dest + len; - uchar *s_end = (uchar*) src + srclen; - while (dest < d_end && src < s_end) - { - if (ismbchar_cp932(cs,(char*) src, (char*) s_end)) - { - *dest++ = *src++; - if (dest < d_end && src < s_end) - *dest++ = *src++; - } - else - *dest++ = sort_order_cp932[(uchar)*src++]; - } - if (len > srclen) - bfill(dest, len - srclen, ' '); - return len; -} - - static const uint16 cp932_to_unicode[65536]= { 0x0000, 0x0001, 0x0002, 0x0003, /* 0000 */ @@ -34785,7 +34761,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = NULL, /* init */ my_strnncoll_cp932, my_strnncollsp_cp932, - my_strnxfrm_cp932, + my_strnxfrm_mb, my_strnxfrmlen_simple, my_like_range_mb, my_wildcmp_mb, /* wildcmp */ @@ -34855,6 +34831,7 @@ struct charset_info_st my_charset_cp932_japanese_ci= 0xFCFC, /* max_sort_char */ ' ', /* pad char */ 1, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_handler, &my_collation_ci_handler }; @@ -34886,6 +34863,7 @@ struct charset_info_st my_charset_cp932_bin= 0xFCFC, /* max_sort_char */ ' ', /* pad char */ 1, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_handler, &my_collation_mb_bin_handler }; diff --git a/strings/ctype-czech.c b/strings/ctype-czech.c index 1418edfecb3..4698521bd16 100644 --- a/strings/ctype-czech.c +++ b/strings/ctype-czech.c @@ -166,7 +166,7 @@ static const struct wordvalue doubles[] = { */ #define ADD_TO_RESULT(dest, len, totlen, value) \ -if ((totlen) < (len)) { dest[totlen] = value; } (totlen++); +{ if ((totlen) < (len)) { dest[totlen++]= value; } } #define IS_END(p, src, len) (((char *)p - (char *)src) >= (len)) #define NEXT_CMP_VALUE(src, p, store, pass, value, len) \ @@ -287,13 +287,26 @@ int my_strnncollsp_czech(CHARSET_INFO * cs, /* + Returns the number of bytes required for strnxfrm(). +*/ +static size_t +my_strnxfrmlen_czech(CHARSET_INFO *cs + __attribute__((unused)), size_t len) +{ + return len * 4 + 4; +} + + +/* Function strnxfrm, actually strxfrm, with Czech sorting, which expect the length of the strings being specified */ -static size_t my_strnxfrm_czech(CHARSET_INFO *cs __attribute__((unused)), - uchar *dest, size_t len, - const uchar *src, size_t srclen) +static size_t +my_strnxfrm_czech(CHARSET_INFO *cs __attribute__((unused)), + uchar *dest, size_t len, + uint nweights_arg __attribute__((unused)), + const uchar *src, size_t srclen, uint flags) { int value; const uchar *p, * store; @@ -301,15 +314,23 @@ static size_t my_strnxfrm_czech(CHARSET_INFO *cs __attribute__((unused)), size_t totlen = 0; p = src; store = src; + if (!(flags & 0x0F)) /* All levels by default */ + flags|= 0x0F; + do { + int add= (1 << pass) & flags; /* If this level is needed */ NEXT_CMP_VALUE(src, p, store, pass, value, (int)srclen); - ADD_TO_RESULT(dest, len, totlen, value); + if (add) + ADD_TO_RESULT(dest, len, totlen, value); } while (value); - if (len > totlen) - bfill(dest + totlen, len - totlen, ' '); - return len; + if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && len > totlen) + { + memset(dest + totlen, ' ', len - totlen); + totlen= len; + } + return totlen; } #undef IS_END @@ -592,7 +613,7 @@ static MY_COLLATION_HANDLER my_collation_latin2_czech_ci_handler = my_strnncoll_czech, my_strnncollsp_czech, my_strnxfrm_czech, - my_strnxfrmlen_simple, + my_strnxfrmlen_czech, my_like_range_czech, my_wildcmp_bin, my_strcasecmp_8bit, @@ -628,6 +649,7 @@ struct charset_info_st my_charset_latin2_czech_ci = 0, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 4, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_latin2_czech_ci_handler }; diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c index 66b8b090241..b7065369258 100644 --- a/strings/ctype-euc_kr.c +++ b/strings/ctype-euc_kr.c @@ -9969,7 +9969,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = NULL, /* init */ my_strnncoll_simple, /* strnncoll */ my_strnncollsp_simple, - my_strnxfrm_simple, /* strnxfrm */ + my_strnxfrm_mb, /* strnxfrm */ my_strnxfrmlen_simple, my_like_range_mb, /* like_range */ my_wildcmp_mb, /* wildcmp */ @@ -10038,6 +10038,7 @@ struct charset_info_st my_charset_euckr_korean_ci= 0xFEFE, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_handler, &my_collation_ci_handler }; @@ -10070,6 +10071,7 @@ struct charset_info_st my_charset_euckr_bin= 0xFEFE, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_handler, &my_collation_mb_bin_handler }; diff --git a/strings/ctype-eucjpms.c b/strings/ctype-eucjpms.c index 59a9a43c0f5..d9033a234c4 100644 --- a/strings/ctype-eucjpms.c +++ b/strings/ctype-eucjpms.c @@ -67511,7 +67511,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = NULL, /* init */ my_strnncoll_simple,/* strnncoll */ my_strnncollsp_simple, - my_strnxfrm_simple, /* strnxfrm */ + my_strnxfrm_mb, /* strnxfrm */ my_strnxfrmlen_simple, my_like_range_mb, /* like_range */ my_wildcmp_mb, /* wildcmp */ @@ -67581,6 +67581,7 @@ struct charset_info_st my_charset_eucjpms_japanese_ci= 0xFEFE, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_handler, &my_collation_ci_handler }; @@ -67613,6 +67614,7 @@ struct charset_info_st my_charset_eucjpms_bin= 0xFEFE, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_handler, &my_collation_mb_bin_handler }; diff --git a/strings/ctype-extra.c b/strings/ctype-extra.c index addeeba8ba0..c690b6d0c18 100644 --- a/strings/ctype-extra.c +++ b/strings/ctype-extra.c @@ -6631,6 +6631,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -6663,6 +6664,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -6695,6 +6697,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -6727,6 +6730,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -6759,6 +6763,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -6791,6 +6796,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -6823,6 +6829,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -6855,6 +6862,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -6887,6 +6895,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -6919,6 +6928,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -6951,6 +6961,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -6983,6 +6994,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -7015,6 +7027,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -7047,6 +7060,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -7079,6 +7093,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -7111,6 +7126,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -7143,6 +7159,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -7175,6 +7192,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -7207,6 +7225,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -7239,6 +7258,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -7271,6 +7291,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -7303,6 +7324,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -7335,6 +7357,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -7367,6 +7390,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -7399,6 +7423,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -7431,6 +7456,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -7463,6 +7489,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -7495,6 +7522,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -7527,6 +7555,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_bin_handler, } @@ -7559,6 +7588,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -7591,6 +7621,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -7623,6 +7654,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -7655,6 +7687,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_bin_handler, } @@ -7687,6 +7720,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -7719,6 +7753,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -7751,6 +7786,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_bin_handler, } @@ -7783,6 +7819,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -7815,6 +7852,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_bin_handler, } @@ -7847,6 +7885,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -7879,6 +7918,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_bin_handler, } @@ -7911,6 +7951,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_bin_handler, } @@ -7943,6 +7984,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_bin_handler, } @@ -7975,6 +8017,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_bin_handler, } @@ -8007,6 +8050,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_bin_handler, } @@ -8039,6 +8083,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_bin_handler, } @@ -8071,6 +8116,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_bin_handler, } @@ -8103,6 +8149,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_bin_handler, } @@ -8135,6 +8182,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_bin_handler, } @@ -8167,6 +8215,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_bin_handler, } @@ -8199,6 +8248,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_bin_handler, } @@ -8231,6 +8281,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_bin_handler, } @@ -8263,6 +8314,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_bin_handler, } @@ -8295,6 +8347,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_bin_handler, } @@ -8327,6 +8380,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_bin_handler, } @@ -8359,6 +8413,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_bin_handler, } @@ -8391,6 +8446,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_bin_handler, } @@ -8423,6 +8479,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_bin_handler, } @@ -8455,6 +8512,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -8487,6 +8545,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_bin_handler, } @@ -8519,6 +8578,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -8551,6 +8611,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } @@ -8582,6 +8643,7 @@ struct charset_info_st compiled_charsets[] = { 255, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_8bit_simple_ci_handler, } diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c index 74be52a5c6d..0399660d311 100644 --- a/strings/ctype-gb2312.c +++ b/strings/ctype-gb2312.c @@ -6372,7 +6372,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = NULL, /* init */ my_strnncoll_simple, /* strnncoll */ my_strnncollsp_simple, - my_strnxfrm_simple, /* strnxfrm */ + my_strnxfrm_mb, /* strnxfrm */ my_strnxfrmlen_simple, my_like_range_mb, /* like_range */ my_wildcmp_mb, /* wildcmp */ @@ -6441,6 +6441,7 @@ struct charset_info_st my_charset_gb2312_chinese_ci= 0xF7FE, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_handler, &my_collation_ci_handler }; @@ -6472,6 +6473,7 @@ struct charset_info_st my_charset_gb2312_bin= 0xF7FE, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_handler, &my_collation_mb_bin_handler }; diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c index dd617fd8548..f1b46ca4e6c 100644 --- a/strings/ctype-gbk.c +++ b/strings/ctype-gbk.c @@ -3525,31 +3525,35 @@ static int my_strnncollsp_gbk(CHARSET_INFO * cs __attribute__((unused)), } -static size_t my_strnxfrm_gbk(CHARSET_INFO *cs __attribute__((unused)), - uchar *dest, size_t len, - const uchar *src, size_t srclen) +static size_t +my_strnxfrm_gbk(CHARSET_INFO *cs, + uchar *dst, size_t dstlen, uint nweights, + const uchar *src, size_t srclen, uint flags) { - uint16 e; - size_t dstlen= len; - uchar *dest_end= dest + dstlen; + uchar *d0= dst; + uchar *de= dst + dstlen; + const uchar *se= src + srclen; + const uchar *sort_order= cs->sort_order; - len = srclen; - while (len-- && dest < dest_end) + for (; dst < de && src < se && nweights; nweights--) { - if ((len > 0) && isgbkcode(*src, *(src+1))) + if (cs->cset->ismbchar(cs, (const char*) src, (const char*) se)) { - e = gbksortorder((uint16) gbkcode(*src, *(src+1))); - *dest++ = gbkhead(e); - if (dest < dest_end) - *dest++ = gbktail(e); - src+=2; - len--; - } else - *dest++ = sort_order_gbk[(uchar) *src++]; + /* + Note, it is safe not to check (src < se) + in the code below, because ismbchar() would + not return TRUE if src was too short + */ + uint16 e= gbksortorder((uint16) gbkcode(*src, *(src + 1))); + *dst++= gbkhead(e); + if (dst < de) + *dst++= gbktail(e); + src+= 2; + } + else + *dst++= sort_order ? sort_order[*src++] : *src++; } - if (dstlen > srclen) - bfill(dest, dstlen - srclen, ' '); - return dstlen; + return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, de, nweights, flags, 0); } @@ -10833,6 +10837,7 @@ struct charset_info_st my_charset_gbk_chinese_ci= 0xA967, /* max_sort_char */ ' ', /* pad char */ 1, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_handler, &my_collation_ci_handler }; @@ -10864,6 +10869,7 @@ struct charset_info_st my_charset_gbk_bin= 0xFEFE, /* max_sort_char */ ' ', /* pad char */ 1, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_handler, &my_collation_mb_bin_handler }; diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c index 759997dae2d..2c84f86fad0 100644 --- a/strings/ctype-latin1.c +++ b/strings/ctype-latin1.c @@ -452,6 +452,7 @@ struct charset_info_st my_charset_latin1= 255, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_handler, &my_collation_8bit_simple_ci_handler }; @@ -663,22 +664,25 @@ static int my_strnncollsp_latin1_de(CHARSET_INFO *cs __attribute__((unused)), } -static size_t my_strnxfrm_latin1_de(CHARSET_INFO *cs __attribute__((unused)), - uchar *dest, size_t len, - const uchar *src, size_t srclen) +static size_t +my_strnxfrm_latin1_de(CHARSET_INFO *cs, + uchar *dst, size_t dstlen, uint nweights, + const uchar* src, size_t srclen, uint flags) { - const uchar *de = dest + len; - const uchar *se = src + srclen; - for ( ; src < se && dest < de ; src++) + uchar *de= dst + dstlen; + const uchar *se= src + srclen; + uchar *d0= dst; + for ( ; src < se && dst < de && nweights; src++, nweights--) { - uchar chr=combo1map[*src]; - *dest++=chr; - if ((chr=combo2map[*src]) && dest < de) - *dest++=chr; + uchar chr= combo1map[*src]; + *dst++= chr; + if ((chr= combo2map[*src]) && dst < de && nweights > 1) + { + *dst++= chr; + nweights--; + } } - if (dest < de) - bfill(dest, de - dest, ' '); - return (int) len; + return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, de, nweights, flags, 0); } @@ -750,6 +754,7 @@ struct charset_info_st my_charset_latin1_german2_ci= 247, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_handler, &my_collation_german2_ci_handler }; @@ -782,6 +787,7 @@ struct charset_info_st my_charset_latin1_bin= 255, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_handler, &my_collation_8bit_bin_handler }; diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c index c5c8fd92842..4c3256e2db8 100644 --- a/strings/ctype-mb.c +++ b/strings/ctype-mb.c @@ -565,15 +565,106 @@ my_strnncollsp_mb_bin(CHARSET_INFO * cs __attribute__((unused)), } -static size_t my_strnxfrm_mb_bin(CHARSET_INFO *cs __attribute__((unused)), - uchar *dest, size_t dstlen, - const uchar *src, size_t srclen) +/* + Copy one non-ascii character. + "dst" must have enough room for the character. + Note, we don't use sort_order[] in this macros. + This is correct even for case insensitive collations: + - basic Latin letters are processed outside this macros; + - for other characters sort_order[x] is equal to x. +*/ +#define my_strnxfrm_mb_non_ascii_char(cs, dst, src, se) \ +{ \ + switch (cs->cset->ismbchar(cs, (const char*) src, (const char*) se)) { \ + case 4: \ + *dst++= *src++; \ + /* fall through */ \ + case 3: \ + *dst++= *src++; \ + /* fall through */ \ + case 2: \ + *dst++= *src++; \ + /* fall through */ \ + case 0: \ + *dst++= *src++; /* byte in range 0x80..0xFF which is not MB head */ \ + } \ +} + + +/* + For character sets with two or three byte multi-byte + characters having multibyte weights *equal* to their codes: + cp932, euckr, gb2312, sjis, eucjpms, ujis. +*/ +size_t +my_strnxfrm_mb(CHARSET_INFO *cs, + uchar *dst, size_t dstlen, uint nweights, + const uchar *src, size_t srclen, uint flags) { - if (dest != src) - memcpy(dest, src, MY_MIN(dstlen, srclen)); - if (dstlen > srclen) - bfill(dest + srclen, dstlen - srclen, ' '); - return dstlen; + uchar *d0= dst; + uchar *de= dst + dstlen; + const uchar *se= src + srclen; + const uchar *sort_order= cs->sort_order; + + DBUG_ASSERT(cs->mbmaxlen <= 4); + + /* + If "srclen" is smaller than both "dstlen" and "nweights" + then we can run a simplified loop - + without checking "nweights" and "de". + */ + if (dstlen >= srclen && nweights >= srclen) + { + if (sort_order) + { + /* Optimized version for a case insensitive collation */ + for (; src < se; nweights--) + { + if (*src < 128) /* quickly catch ASCII characters */ + *dst++= sort_order[*src++]; + else + my_strnxfrm_mb_non_ascii_char(cs, dst, src, se); + } + } + else + { + /* Optimized version for a case sensitive collation (no sort_order) */ + for (; src < se; nweights--) + { + if (*src < 128) /* quickly catch ASCII characters */ + *dst++= *src++; + else + my_strnxfrm_mb_non_ascii_char(cs, dst, src, se); + } + } + goto pad; + } + + /* + A thourough loop, checking all possible limits: + "se", "nweights" and "de". + */ + for (; src < se && nweights && dst < de; nweights--) + { + int chlen; + if (*src < 128 || + !(chlen= cs->cset->ismbchar(cs, (const char*) src, (const char*) se))) + { + /* Single byte character */ + *dst++= sort_order ? sort_order[*src++] : *src++; + } + else + { + /* Multi-byte character */ + int len= (dst + chlen <= de) ? chlen : de - dst; + memcpy(dst, src, len); + dst+= len; + src+= len; + } + } + +pad: + return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, de, nweights, flags, 0); } @@ -1381,7 +1472,7 @@ MY_COLLATION_HANDLER my_collation_mb_bin_handler = NULL, /* init */ my_strnncoll_mb_bin, my_strnncollsp_mb_bin, - my_strnxfrm_mb_bin, + my_strnxfrm_mb, my_strnxfrmlen_simple, my_like_range_mb, my_wildcmp_mb_bin, diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c index 91a9df9d50b..7a06570d5b5 100644 --- a/strings/ctype-simple.c +++ b/strings/ctype-simple.c @@ -73,27 +73,28 @@ size_t my_strnxfrmlen_simple(CHARSET_INFO *cs, size_t len) size_t my_strnxfrm_simple(CHARSET_INFO * cs, - uchar *dest, size_t len, - const uchar *src, size_t srclen) + uchar *dst, size_t dstlen, uint nweights, + const uchar *src, size_t srclen, uint flags) { const uchar *map= cs->sort_order; - size_t dstlen= len; - set_if_smaller(len, srclen); - if (dest != src) + uchar *d0= dst; + uint frmlen; + if ((frmlen= MY_MIN(dstlen, nweights)) > srclen) + frmlen= srclen; + if (dst != src) { const uchar *end; - for ( end=src+len; src < end ; ) - *dest++= map[*src++]; + for (end= src + frmlen; src < end;) + *dst++= map[*src++]; } else { const uchar *end; - for ( end=dest+len; dest < end ; dest++) - *dest= (char) map[(uchar) *dest]; + for (end= dst + frmlen; dst < end; dst++) + *dst= map[(uchar) *dst]; } - if (dstlen > len) - bfill(dest, dstlen - len, ' '); - return dstlen; + return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, d0 + dstlen, + nweights - frmlen, flags, 0); } @@ -1684,6 +1685,145 @@ my_bool my_propagate_complex(CHARSET_INFO *cs __attribute__((unused)), } +/* + Normalize strxfrm flags + + SYNOPSIS: + my_strxfrm_flag_normalize() + flags - non-normalized flags + nlevels - number of levels + + NOTES: + If levels are omitted, then 1-maximum is assumed. + If any level number is greater than the maximum, + it is treated as the maximum. + + RETURN + normalized flags +*/ + +uint my_strxfrm_flag_normalize(uint flags, uint maximum) +{ + DBUG_ASSERT(maximum >= 1 && maximum <= MY_STRXFRM_NLEVELS); + + /* If levels are omitted, then 1-maximum is assumed*/ + if (!(flags & MY_STRXFRM_LEVEL_ALL)) + { + static uint def_level_flags[]= {0, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F }; + uint flag_pad= flags & + (MY_STRXFRM_PAD_WITH_SPACE | MY_STRXFRM_PAD_TO_MAXLEN); + flags= def_level_flags[maximum] | flag_pad; + } + else + { + uint i; + uint flag_lev= flags & MY_STRXFRM_LEVEL_ALL; + uint flag_dsc= (flags >> MY_STRXFRM_DESC_SHIFT) & MY_STRXFRM_LEVEL_ALL; + uint flag_rev= (flags >> MY_STRXFRM_REVERSE_SHIFT) & MY_STRXFRM_LEVEL_ALL; + uint flag_pad= flags & + (MY_STRXFRM_PAD_WITH_SPACE | MY_STRXFRM_PAD_TO_MAXLEN); + + /* + If any level number is greater than the maximum, + it is treated as the maximum. + */ + for (maximum--, flags= 0, i= 0; i < MY_STRXFRM_NLEVELS; i++) + { + uint src_bit= 1 << i; + if (flag_lev & src_bit) + { + uint dst_bit= 1 << MY_MIN(i, maximum); + flags|= dst_bit; + flags|= (flag_dsc & dst_bit) << MY_STRXFRM_DESC_SHIFT; + flags|= (flag_rev & dst_bit) << MY_STRXFRM_REVERSE_SHIFT; + } + } + flags|= flag_pad; + } + + return flags; +} + + +/* + Apply DESC and REVERSE collation rules. + + SYNOPSIS: + my_strxfrm_desc_and_reverse() + str - pointer to string + strend - end of string + flags - flags + level - which level, starting from 0. + + NOTES: + Apply DESC or REVERSE or both flags. + + If DESC flag is given, then the weights + come out NOTed or negated for that level. + + If REVERSE flags is given, then the weights come out in + reverse order for that level, that is, starting with + the last character and ending with the first character. + + If nether DESC nor REVERSE flags are give, + the string is not changed. + +*/ +void +my_strxfrm_desc_and_reverse(uchar *str, uchar *strend, + uint flags, uint level) +{ + if (flags & (MY_STRXFRM_DESC_LEVEL1 << level)) + { + if (flags & (MY_STRXFRM_REVERSE_LEVEL1 << level)) + { + for (strend--; str <= strend;) + { + uchar tmp= *str; + *str++= ~*strend; + *strend--= ~tmp; + } + } + else + { + for (; str < strend; str++) + *str= ~*str; + } + } + else if (flags & (MY_STRXFRM_REVERSE_LEVEL1 << level)) + { + for (strend--; str < strend;) + { + uchar tmp= *str; + *str++= *strend; + *strend--= tmp; + } + } +} + + +size_t +my_strxfrm_pad_desc_and_reverse(CHARSET_INFO *cs, + uchar *str, uchar *frmend, uchar *strend, + uint nweights, uint flags, uint level) +{ + if (nweights && frmend < strend && (flags & MY_STRXFRM_PAD_WITH_SPACE)) + { + uint fill_length= MY_MIN((uint) (strend - frmend), nweights * cs->mbminlen); + cs->cset->fill(cs, (char*) frmend, fill_length, cs->pad_char); + frmend+= fill_length; + } + my_strxfrm_desc_and_reverse(str, frmend, flags, level); + if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && frmend < strend) + { + uint fill_length= strend - frmend; + cs->cset->fill(cs, (char*) frmend, fill_length, cs->pad_char); + frmend= strend; + } + return frmend - str; +} + + MY_CHARSET_HANDLER my_charset_8bit_handler= { my_cset_init_8bit, diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c index 2c3d2b34dab..dce9e5ad37f 100644 --- a/strings/ctype-sjis.c +++ b/strings/ctype-sjis.c @@ -1163,29 +1163,6 @@ static int my_strnncollsp_sjis(CHARSET_INFO *cs __attribute__((unused)), -static size_t my_strnxfrm_sjis(CHARSET_INFO *cs __attribute__((unused)), - uchar *dest, size_t len, - const uchar *src, size_t srclen) -{ - uchar *d_end = dest + len; - uchar *s_end = (uchar*) src + srclen; - while (dest < d_end && src < s_end) - { - if (ismbchar_sjis(cs,(char*) src, (char*) s_end)) - { - *dest++ = *src++; - if (dest < d_end && src < s_end) - *dest++ = *src++; - } - else - *dest++ = sort_order_sjis[(uchar)*src++]; - } - if (len > srclen) - bfill(dest, len - srclen, ' '); - return len; -} - - /* SJIS->Unicode conversion table */ static uint16 sjis_to_unicode[65536]= { @@ -34156,7 +34133,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = NULL, /* init */ my_strnncoll_sjis, my_strnncollsp_sjis, - my_strnxfrm_sjis, + my_strnxfrm_mb, my_strnxfrmlen_simple, my_like_range_mb, my_wildcmp_mb, /* wildcmp */ @@ -34226,6 +34203,7 @@ struct charset_info_st my_charset_sjis_japanese_ci= 0xFCFC, /* max_sort_char */ ' ', /* pad char */ 1, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_handler, &my_collation_ci_handler }; @@ -34257,6 +34235,7 @@ struct charset_info_st my_charset_sjis_bin= 0xFCFC, /* max_sort_char */ ' ', /* pad char */ 1, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_handler, &my_collation_mb_bin_handler }; diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c index d0b4f9b8862..c2ed01a0603 100644 --- a/strings/ctype-tis620.c +++ b/strings/ctype-tis620.c @@ -617,18 +617,26 @@ ret: Ret: Conveted string size */ -static -size_t my_strnxfrm_tis620(CHARSET_INFO *cs __attribute__((unused)), - uchar *dest, size_t len, - const uchar *src, size_t srclen) +static size_t +my_strnxfrm_tis620(const CHARSET_INFO *cs, + uchar *dst, size_t dstlen, uint nweights, + const uchar *src, size_t srclen, uint flags) { - size_t dstlen= len; - len= (size_t) (strmake((char*) dest, (char*) src, MY_MIN(len, srclen)) - - (char*) dest); - len= thai2sortable(dest, len); - if (dstlen > len) - bfill(dest + len, dstlen - len, ' '); - return dstlen; + size_t len, dstlen0= dstlen; + len= (uint) (strmake((char*) dst, (char*) src, MY_MIN(dstlen, srclen)) - + (char*) dst); + len= thai2sortable(dst, len); + set_if_smaller(dstlen, nweights); + set_if_smaller(len, dstlen); + len= my_strxfrm_pad_desc_and_reverse(cs, dst, dst + len, dst + dstlen, + dstlen - len, flags, 0); + if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && len < dstlen0) + { + uint fill_length= dstlen0 - len; + cs->cset->fill(cs, (char*) dst + len, fill_length, cs->pad_char); + len= dstlen0; + } + return len; } @@ -909,6 +917,7 @@ struct charset_info_st my_charset_tis620_thai_ci= 255, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_handler, &my_collation_ci_handler }; @@ -940,6 +949,7 @@ struct charset_info_st my_charset_tis620_bin= 255, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_handler, &my_collation_8bit_bin_handler }; diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c index 5d52cb7e517..6eb68385832 100644 --- a/strings/ctype-uca.c +++ b/strings/ctype-uca.c @@ -7636,33 +7636,49 @@ static void my_hash_sort_uca(CHARSET_INFO *cs, Number of bytes that have been written into the binary image. */ -static size_t my_strnxfrm_uca(CHARSET_INFO *cs, - my_uca_scanner_handler *scanner_handler, - uchar *dst, size_t dstlen, - const uchar *src, size_t srclen) + +static size_t +my_strnxfrm_uca(CHARSET_INFO *cs, + my_uca_scanner_handler *scanner_handler, + uchar *dst, size_t dstlen, uint nweights, + const uchar *src, size_t srclen, uint flags) { - uchar *de= dst + (dstlen & (size_t) ~1); /* add even length for easier code */ + uchar *d0= dst; + uchar *de= dst + dstlen; int s_res; my_uca_scanner scanner; scanner_handler->init(&scanner, cs, &cs->uca->level[0], src, srclen); - while (dst < de && (s_res= scanner_handler->next(&scanner)) >0) + for (; dst < de && nweights && + (s_res= scanner_handler->next(&scanner)) > 0 ; nweights--) { - dst[0]= s_res >> 8; - dst[1]= s_res & 0xFF; - dst+= 2; + *dst++= s_res >> 8; + if (dst < de) + *dst++= s_res & 0xFF; } - s_res= my_space_weight(cs); - while (dst < de) + + if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE)) { - dst[0]= s_res >> 8; - dst[1]= s_res & 0xFF; - dst+= 2; + uint space_count= MY_MIN((uint) (de - dst) / 2, nweights); + s_res= my_space_weight(cs); + for (; space_count ; space_count--) + { + *dst++= s_res >> 8; + *dst++= s_res & 0xFF; + } } - if (dstlen & 1) /* if odd number then fill the last char */ - *dst= '\0'; - - return dstlen; + my_strxfrm_desc_and_reverse(d0, dst, flags, 0); + if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de) + { + s_res= my_space_weight(cs); + for ( ; dst < de; ) + { + *dst++= s_res >> 8; + if (dst < de) + *dst++= s_res & 0xFF; + } + } + return dst - d0; } @@ -9461,11 +9477,11 @@ static void my_hash_sort_any_uca(CHARSET_INFO *cs, } static size_t my_strnxfrm_any_uca(CHARSET_INFO *cs, - uchar *dst, size_t dstlen, - const uchar *src, size_t srclen) + uchar *dst, size_t dstlen, uint nweights, + const uchar *src, size_t srclen, uint flags) { return my_strnxfrm_uca(cs, &my_any_uca_scanner_handler, - dst, dstlen, src, srclen); + dst, dstlen, nweights, src, srclen, flags); } @@ -9500,11 +9516,11 @@ static void my_hash_sort_ucs2_uca(CHARSET_INFO *cs, } static size_t my_strnxfrm_ucs2_uca(CHARSET_INFO *cs, - uchar *dst, size_t dstlen, - const uchar *src, size_t srclen) + uchar *dst, size_t dstlen, uint nweights, + const uchar *src, size_t srclen, uint flags) { return my_strnxfrm_uca(cs, &my_any_uca_scanner_handler, - dst, dstlen, src, srclen); + dst, dstlen, nweights, src, srclen, flags); } MY_COLLATION_HANDLER my_collation_ucs2_uca_handler = @@ -9549,6 +9565,7 @@ struct charset_info_st my_charset_ucs2_unicode_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler }; @@ -9580,6 +9597,7 @@ struct charset_info_st my_charset_ucs2_icelandic_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler }; @@ -9611,6 +9629,7 @@ struct charset_info_st my_charset_ucs2_latvian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler }; @@ -9642,6 +9661,7 @@ struct charset_info_st my_charset_ucs2_romanian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler }; @@ -9673,6 +9693,7 @@ struct charset_info_st my_charset_ucs2_slovenian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler }; @@ -9704,6 +9725,7 @@ struct charset_info_st my_charset_ucs2_polish_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler }; @@ -9735,6 +9757,7 @@ struct charset_info_st my_charset_ucs2_estonian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler }; @@ -9766,6 +9789,7 @@ struct charset_info_st my_charset_ucs2_spanish_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler }; @@ -9797,6 +9821,7 @@ struct charset_info_st my_charset_ucs2_swedish_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler }; @@ -9828,6 +9853,7 @@ struct charset_info_st my_charset_ucs2_turkish_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler }; @@ -9859,6 +9885,7 @@ struct charset_info_st my_charset_ucs2_czech_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler }; @@ -9891,6 +9918,7 @@ struct charset_info_st my_charset_ucs2_danish_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler }; @@ -9922,6 +9950,7 @@ struct charset_info_st my_charset_ucs2_lithuanian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler }; @@ -9953,6 +9982,7 @@ struct charset_info_st my_charset_ucs2_slovak_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler }; @@ -9984,6 +10014,7 @@ struct charset_info_st my_charset_ucs2_spanish2_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler }; @@ -10016,6 +10047,7 @@ struct charset_info_st my_charset_ucs2_roman_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler }; @@ -10048,6 +10080,7 @@ struct charset_info_st my_charset_ucs2_persian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler }; @@ -10080,6 +10113,7 @@ struct charset_info_st my_charset_ucs2_esperanto_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler }; @@ -10112,6 +10146,7 @@ struct charset_info_st my_charset_ucs2_hungarian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler }; @@ -10143,6 +10178,7 @@ struct charset_info_st my_charset_ucs2_sinhala_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler }; @@ -10176,6 +10212,7 @@ struct charset_info_st my_charset_ucs2_german2_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler }; @@ -10207,6 +10244,7 @@ struct charset_info_st my_charset_ucs2_croatian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_ucs2_handler, &my_collation_ucs2_uca_handler }; @@ -10288,6 +10326,7 @@ struct charset_info_st my_charset_utf8_unicode_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8_handler, &my_collation_any_uca_handler }; @@ -10320,6 +10359,7 @@ struct charset_info_st my_charset_utf8_icelandic_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8_handler, &my_collation_any_uca_handler }; @@ -10351,6 +10391,7 @@ struct charset_info_st my_charset_utf8_latvian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8_handler, &my_collation_any_uca_handler }; @@ -10382,6 +10423,7 @@ struct charset_info_st my_charset_utf8_romanian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8_handler, &my_collation_any_uca_handler }; @@ -10413,6 +10455,7 @@ struct charset_info_st my_charset_utf8_slovenian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8_handler, &my_collation_any_uca_handler }; @@ -10444,6 +10487,7 @@ struct charset_info_st my_charset_utf8_polish_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8_handler, &my_collation_any_uca_handler }; @@ -10475,6 +10519,7 @@ struct charset_info_st my_charset_utf8_estonian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8_handler, &my_collation_any_uca_handler }; @@ -10506,6 +10551,7 @@ struct charset_info_st my_charset_utf8_spanish_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8_handler, &my_collation_any_uca_handler }; @@ -10537,6 +10583,7 @@ struct charset_info_st my_charset_utf8_swedish_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8_handler, &my_collation_any_uca_handler }; @@ -10568,6 +10615,7 @@ struct charset_info_st my_charset_utf8_turkish_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8_handler, &my_collation_any_uca_handler }; @@ -10599,6 +10647,7 @@ struct charset_info_st my_charset_utf8_czech_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8_handler, &my_collation_any_uca_handler }; @@ -10631,6 +10680,7 @@ struct charset_info_st my_charset_utf8_danish_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8_handler, &my_collation_any_uca_handler }; @@ -10662,6 +10712,7 @@ struct charset_info_st my_charset_utf8_lithuanian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8_handler, &my_collation_any_uca_handler }; @@ -10693,6 +10744,7 @@ struct charset_info_st my_charset_utf8_slovak_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8_handler, &my_collation_any_uca_handler }; @@ -10724,6 +10776,7 @@ struct charset_info_st my_charset_utf8_spanish2_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8_handler, &my_collation_any_uca_handler }; @@ -10755,6 +10808,7 @@ struct charset_info_st my_charset_utf8_roman_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8_handler, &my_collation_any_uca_handler }; @@ -10786,6 +10840,7 @@ struct charset_info_st my_charset_utf8_persian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8_handler, &my_collation_any_uca_handler }; @@ -10817,6 +10872,7 @@ struct charset_info_st my_charset_utf8_esperanto_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8_handler, &my_collation_any_uca_handler }; @@ -10848,6 +10904,7 @@ struct charset_info_st my_charset_utf8_hungarian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8_handler, &my_collation_any_uca_handler }; @@ -10879,6 +10936,7 @@ struct charset_info_st my_charset_utf8_sinhala_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8_handler, &my_collation_any_uca_handler }; @@ -10911,6 +10969,7 @@ struct charset_info_st my_charset_utf8_german2_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8_handler, &my_collation_any_uca_handler }; @@ -10942,6 +11001,7 @@ struct charset_info_st my_charset_utf8_croatian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8_handler, &my_collation_any_uca_handler }; @@ -10983,6 +11043,7 @@ struct charset_info_st my_charset_utf8mb4_unicode_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler }; @@ -11015,6 +11076,7 @@ struct charset_info_st my_charset_utf8mb4_icelandic_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler }; @@ -11046,6 +11108,7 @@ struct charset_info_st my_charset_utf8mb4_latvian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler }; @@ -11077,6 +11140,7 @@ struct charset_info_st my_charset_utf8mb4_romanian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler }; @@ -11108,6 +11172,7 @@ struct charset_info_st my_charset_utf8mb4_slovenian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler }; @@ -11139,6 +11204,7 @@ struct charset_info_st my_charset_utf8mb4_polish_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler }; @@ -11170,6 +11236,7 @@ struct charset_info_st my_charset_utf8mb4_estonian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler }; @@ -11201,6 +11268,7 @@ struct charset_info_st my_charset_utf8mb4_spanish_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler }; @@ -11232,6 +11300,7 @@ struct charset_info_st my_charset_utf8mb4_swedish_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler }; @@ -11263,6 +11332,7 @@ struct charset_info_st my_charset_utf8mb4_turkish_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler }; @@ -11294,6 +11364,7 @@ struct charset_info_st my_charset_utf8mb4_czech_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler }; @@ -11326,6 +11397,7 @@ struct charset_info_st my_charset_utf8mb4_danish_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler }; @@ -11357,6 +11429,7 @@ struct charset_info_st my_charset_utf8mb4_lithuanian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler }; @@ -11388,6 +11461,7 @@ struct charset_info_st my_charset_utf8mb4_slovak_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler }; @@ -11419,6 +11493,7 @@ struct charset_info_st my_charset_utf8mb4_spanish2_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler }; @@ -11450,6 +11525,7 @@ struct charset_info_st my_charset_utf8mb4_roman_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler }; @@ -11481,6 +11557,7 @@ struct charset_info_st my_charset_utf8mb4_persian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler }; @@ -11512,6 +11589,7 @@ struct charset_info_st my_charset_utf8mb4_esperanto_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler }; @@ -11543,6 +11621,7 @@ struct charset_info_st my_charset_utf8mb4_hungarian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler }; @@ -11574,6 +11653,7 @@ struct charset_info_st my_charset_utf8mb4_sinhala_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler }; @@ -11605,6 +11685,7 @@ struct charset_info_st my_charset_utf8mb4_german2_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler }; @@ -11636,6 +11717,7 @@ struct charset_info_st my_charset_utf8mb4_croatian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8mb4_handler, &my_collation_any_uca_handler }; @@ -11691,6 +11773,7 @@ struct charset_info_st my_charset_utf32_unicode_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler }; @@ -11723,6 +11806,7 @@ struct charset_info_st my_charset_utf32_icelandic_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler }; @@ -11754,6 +11838,7 @@ struct charset_info_st my_charset_utf32_latvian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler }; @@ -11785,6 +11870,7 @@ struct charset_info_st my_charset_utf32_romanian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler }; @@ -11816,6 +11902,7 @@ struct charset_info_st my_charset_utf32_slovenian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler }; @@ -11847,6 +11934,7 @@ struct charset_info_st my_charset_utf32_polish_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler }; @@ -11878,6 +11966,7 @@ struct charset_info_st my_charset_utf32_estonian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler }; @@ -11909,6 +11998,7 @@ struct charset_info_st my_charset_utf32_spanish_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler }; @@ -11940,6 +12030,7 @@ struct charset_info_st my_charset_utf32_swedish_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler }; @@ -11971,6 +12062,7 @@ struct charset_info_st my_charset_utf32_turkish_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler }; @@ -12002,6 +12094,7 @@ struct charset_info_st my_charset_utf32_czech_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler }; @@ -12034,6 +12127,7 @@ struct charset_info_st my_charset_utf32_danish_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler }; @@ -12065,6 +12159,7 @@ struct charset_info_st my_charset_utf32_lithuanian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler }; @@ -12096,6 +12191,7 @@ struct charset_info_st my_charset_utf32_slovak_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler }; @@ -12127,6 +12223,7 @@ struct charset_info_st my_charset_utf32_spanish2_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler }; @@ -12158,6 +12255,7 @@ struct charset_info_st my_charset_utf32_roman_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler }; @@ -12189,6 +12287,7 @@ struct charset_info_st my_charset_utf32_persian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler }; @@ -12220,6 +12319,7 @@ struct charset_info_st my_charset_utf32_esperanto_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler }; @@ -12251,6 +12351,7 @@ struct charset_info_st my_charset_utf32_hungarian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler }; @@ -12282,6 +12383,7 @@ struct charset_info_st my_charset_utf32_sinhala_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler }; @@ -12313,6 +12415,7 @@ struct charset_info_st my_charset_utf32_german2_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler }; @@ -12344,6 +12447,7 @@ struct charset_info_st my_charset_utf32_croatian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf32_handler, &my_collation_utf32_uca_handler }; @@ -12400,6 +12504,7 @@ struct charset_info_st my_charset_utf16_unicode_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler }; @@ -12432,6 +12537,7 @@ struct charset_info_st my_charset_utf16_icelandic_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler }; @@ -12463,6 +12569,7 @@ struct charset_info_st my_charset_utf16_latvian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler }; @@ -12494,6 +12601,7 @@ struct charset_info_st my_charset_utf16_romanian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler }; @@ -12525,6 +12633,7 @@ struct charset_info_st my_charset_utf16_slovenian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler }; @@ -12556,6 +12665,7 @@ struct charset_info_st my_charset_utf16_polish_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler }; @@ -12587,6 +12697,7 @@ struct charset_info_st my_charset_utf16_estonian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler }; @@ -12618,6 +12729,7 @@ struct charset_info_st my_charset_utf16_spanish_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler }; @@ -12649,6 +12761,7 @@ struct charset_info_st my_charset_utf16_swedish_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler }; @@ -12680,6 +12793,7 @@ struct charset_info_st my_charset_utf16_turkish_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler }; @@ -12711,6 +12825,7 @@ struct charset_info_st my_charset_utf16_czech_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler }; @@ -12743,6 +12858,7 @@ struct charset_info_st my_charset_utf16_danish_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler }; @@ -12774,6 +12890,7 @@ struct charset_info_st my_charset_utf16_lithuanian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler }; @@ -12805,6 +12922,7 @@ struct charset_info_st my_charset_utf16_slovak_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler }; @@ -12836,6 +12954,7 @@ struct charset_info_st my_charset_utf16_spanish2_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler }; @@ -12867,6 +12986,7 @@ struct charset_info_st my_charset_utf16_roman_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler }; @@ -12898,6 +13018,7 @@ struct charset_info_st my_charset_utf16_persian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler }; @@ -12929,6 +13050,7 @@ struct charset_info_st my_charset_utf16_esperanto_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler }; @@ -12960,6 +13082,7 @@ struct charset_info_st my_charset_utf16_hungarian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler }; @@ -12991,6 +13114,7 @@ struct charset_info_st my_charset_utf16_sinhala_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler }; @@ -13022,6 +13146,7 @@ struct charset_info_st my_charset_utf16_german2_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler }; @@ -13054,6 +13179,7 @@ struct charset_info_st my_charset_utf16_croatian_uca_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf16_handler, &my_collation_utf16_uca_handler }; diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index a5845a26917..344039013f3 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -1711,6 +1711,7 @@ struct charset_info_st my_charset_utf16_general_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf16_handler, &my_collation_utf16_general_ci_handler }; @@ -1743,6 +1744,7 @@ struct charset_info_st my_charset_utf16_bin= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf16_handler, &my_collation_utf16_bin_handler }; @@ -1878,6 +1880,7 @@ struct charset_info_st my_charset_utf16le_general_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf16le_handler, &my_collation_utf16_general_ci_handler }; @@ -1910,6 +1913,7 @@ struct charset_info_st my_charset_utf16le_bin= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf16le_handler, &my_collation_utf16_bin_handler }; @@ -2802,6 +2806,7 @@ struct charset_info_st my_charset_utf32_general_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf32_handler, &my_collation_utf32_general_ci_handler }; @@ -2834,6 +2839,7 @@ struct charset_info_st my_charset_utf32_bin= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf32_handler, &my_collation_utf32_bin_handler }; @@ -3419,6 +3425,7 @@ struct charset_info_st my_charset_ucs2_general_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_ucs2_handler, &my_collation_ucs2_general_ci_handler }; @@ -3451,6 +3458,7 @@ struct charset_info_st my_charset_ucs2_general_mysql500_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_ucs2_handler, &my_collation_ucs2_general_ci_handler }; @@ -3483,6 +3491,7 @@ struct charset_info_st my_charset_ucs2_bin= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_ucs2_handler, &my_collation_ucs2_bin_handler }; diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c index 0f405825830..9a69995b278 100644 --- a/strings/ctype-ujis.c +++ b/strings/ctype-ujis.c @@ -67258,7 +67258,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = NULL, /* init */ my_strnncoll_simple,/* strnncoll */ my_strnncollsp_simple, - my_strnxfrm_simple, /* strnxfrm */ + my_strnxfrm_mb, /* strnxfrm */ my_strnxfrmlen_simple, my_like_range_mb, /* like_range */ my_wildcmp_mb, /* wildcmp */ @@ -67328,6 +67328,7 @@ struct charset_info_st my_charset_ujis_japanese_ci= 0xFEFE, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_handler, &my_collation_ci_handler }; @@ -67360,6 +67361,7 @@ struct charset_info_st my_charset_ujis_bin= 0xFEFE, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_handler, &my_collation_mb_bin_handler }; diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index ae891b43d37..207eaffb1a3 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -2083,7 +2083,7 @@ int my_wildcmp_unicode_impl(CHARSET_INFO *cs, str+= scan; result= my_wildcmp_unicode_impl(cs, str, str_end, wildstr, wildend, escape, w_one, w_many, - weights, recurse_level+1); + weights, recurse_level + 1); if (result <= 0) return result; } @@ -2104,6 +2104,71 @@ my_wildcmp_unicode(CHARSET_INFO *cs, wildstr, wildend, escape, w_one, w_many, weights, 1); } + + +/** + Pad buffer with weights for space characters. + + @details + This functions fills the buffer pointed by "str" + with weights of space character. Not more than + "nweights" weights are put. If at some iteration + step only a half of weight can fit + (which is possible if buffer length is an odd number) + then a half of this weight is put - this gives + a little bit better ORDER BY result for long strings. + + @str Buffer + @strend End of buffer + @nweights Number of weights + + @return Result length +*/ + +static size_t +my_strxfrm_pad_nweights_unicode(uchar *str, uchar *strend, size_t nweights) +{ + uchar *str0; + DBUG_ASSERT(str && str <= strend); + for (str0= str; str < strend && nweights; nweights--) + { + *str++= 0x00; + if (str < strend) + *str++= 0x20; + } + return str - str0; +} + + +/** + Pad buffer with weights for space characters. + + @details + This functions fills the buffer pointed by "str" + with weights of space character. Putting half of weight + (when buffer length is an odd number) is OK. + + @str Buffer + @strend End of buffer + + @return Result length +*/ + +static size_t +my_strxfrm_pad_unicode(uchar *str, uchar *strend) +{ + uchar *str0= str; + DBUG_ASSERT(str && str <= strend); + for ( ; str < strend ; ) + { + *str++= 0x00; + if (str < strend) + *str++= 0x20; + } + return str - str0; +} + + /* Store sorting weights using 2 bytes per character. @@ -2115,23 +2180,24 @@ my_wildcmp_unicode(CHARSET_INFO *cs, */ size_t my_strnxfrm_unicode(CHARSET_INFO *cs, - uchar *dst, size_t dstlen, - const uchar *src, size_t srclen) + uchar *dst, size_t dstlen, uint nweights, + const uchar *src, size_t srclen, uint flags) { - my_wc_t UNINIT_VAR(wc); + my_wc_t wc; int res; + uchar *dst0= dst; uchar *de= dst + dstlen; - uchar *de_beg= de - 1; - const uchar *se = src + srclen; + const uchar *se= src + srclen; MY_UNICASE_INFO *uni_plane= (cs->state & MY_CS_BINSORT) ? - NULL : cs->caseinfo; + NULL : cs->caseinfo; + LINT_INIT(wc); DBUG_ASSERT(src); - - while (dst < de_beg) + + for (; dst < de && nweights; nweights--) { - if ((res= cs->cset->mb_wc(cs,&wc, src, se)) <= 0) + if ((res= cs->cset->mb_wc(cs, &wc, src, se)) <= 0) break; - src+=res; + src+= res; if (uni_plane) my_tosort_unicode(uni_plane, &wc, cs->state); @@ -2140,17 +2206,15 @@ my_strnxfrm_unicode(CHARSET_INFO *cs, if (dst < de) *dst++= (uchar) (wc & 0xFF); } - - while (dst < de_beg) /* Fill the tail with keys for space character */ - { - *dst++= 0x00; - *dst++= 0x20; - } - - if (dst < de) /* Clear the last byte, if "dstlen" was an odd number */ - *dst= 0x00; - - return dstlen; + + if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE)) + dst+= my_strxfrm_pad_nweights_unicode(dst, de, nweights); + + my_strxfrm_desc_and_reverse(dst0, dst, flags, 0); + + if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de) + dst+= my_strxfrm_pad_unicode(dst, de); + return dst - dst0; } @@ -2160,45 +2224,63 @@ my_strnxfrm_unicode(CHARSET_INFO *cs, */ size_t my_strnxfrm_unicode_full_bin(CHARSET_INFO *cs, - uchar *dst, size_t dstlen, - const uchar *src, size_t srclen) + uchar *dst, size_t dstlen, uint nweights, + const uchar *src, size_t srclen, uint flags) { my_wc_t wc; + uchar *dst0= dst; uchar *de= dst + dstlen; - uchar *de_beg= de - 2; /* The beginning of the last chunk */ const uchar *se = src + srclen; LINT_INIT(wc); DBUG_ASSERT(src); DBUG_ASSERT(cs->state & MY_CS_BINSORT); - while (dst < de_beg) + for ( ; dst < de && nweights; nweights--) { int res; if ((res= cs->cset->mb_wc(cs, &wc, src, se)) <= 0) break; src+= res; *dst++= (uchar) (wc >> 16); - *dst++= (uchar) ((wc >> 8) & 0xFF); - *dst++= (uchar) (wc & 0xFF); + if (dst < de) + { + *dst++= (uchar) ((wc >> 8) & 0xFF); + if (dst < de) + *dst++= (uchar) (wc & 0xFF); + } } - while (dst < de_beg) /* Fill the tail with keys for space character */ + if (flags & MY_STRXFRM_PAD_WITH_SPACE) { - *dst++= 0x00; - *dst++= 0x00; - *dst++= 0x20; + for ( ; dst < de && nweights; nweights--) + { + *dst++= 0x00; + if (dst < de) + { + *dst++= 0x00; + if (dst < de) + *dst++= 0x20; + } + } } + + my_strxfrm_desc_and_reverse(dst0, dst, flags, 0); - /* Clear the last one or two bytes, if "dstlen" was not divisible by 3 */ - if (dst < de) + if (flags & MY_STRXFRM_PAD_TO_MAXLEN) { - *dst++= 0x00; - if (dst < de) - *dst= 0x00; + while (dst < de) + { + *dst++= 0x00; + if (dst < de) + { + *dst++= 0x00; + if (dst < de) + *dst++= 0x20; + } + } } - - return dstlen; + return dst - dst0; } @@ -2944,7 +3026,7 @@ static uint my_mbcharlen_utf8(CHARSET_INFO *cs __attribute__((unused)), } -static MY_COLLATION_HANDLER my_collation_ci_handler = +static MY_COLLATION_HANDLER my_collation_utf8_general_ci_handler = { NULL, /* init */ my_strnncoll_utf8, @@ -2959,6 +3041,22 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = my_propagate_complex }; + +static MY_COLLATION_HANDLER my_collation_utf8_bin_handler = +{ + NULL, /* init */ + my_strnncoll_mb_bin, + my_strnncollsp_mb_bin, + my_strnxfrm_unicode, + my_strnxfrmlen_utf8, + my_like_range_mb, + my_wildcmp_mb_bin, + my_strcasecmp_mb_bin, + my_instr_mb, + my_hash_sort_mb_bin, + my_propagate_simple +}; + MY_CHARSET_HANDLER my_charset_utf8_handler= { NULL, /* init */ @@ -3019,8 +3117,9 @@ struct charset_info_st my_charset_utf8_general_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8_handler, - &my_collation_ci_handler + &my_collation_utf8_general_ci_handler }; @@ -3051,8 +3150,9 @@ struct charset_info_st my_charset_utf8_general_mysql500_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8_handler, - &my_collation_ci_handler + &my_collation_utf8_general_ci_handler }; @@ -3083,8 +3183,9 @@ struct charset_info_st my_charset_utf8_bin= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8_handler, - &my_collation_mb_bin_handler + &my_collation_utf8_bin_handler }; #ifdef HAVE_UTF8_GENERAL_CS @@ -3111,7 +3212,6 @@ static int my_strnncoll_utf8_cs(CHARSET_INFO *cs, while ( s < se && t < te ) { - int plane; s_res=my_utf8_uni(cs,&s_wc, s, se); t_res=my_utf8_uni(cs,&t_wc, t, te); @@ -3126,10 +3226,10 @@ static int my_strnncoll_utf8_cs(CHARSET_INFO *cs, { save_diff = ((int)s_wc) - ((int)t_wc); } - plane=(s_wc>>8) & 0xFF; - s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc; - plane=(t_wc>>8) & 0xFF; - t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc; + + my_tosort_unicode(uni_plane, &s_wc, cs->state); + my_tosort_unicode(uni_plane, &t_wc, cs->state); + if ( s_wc != t_wc ) { return ((int) s_wc) - ((int) t_wc); @@ -3249,11 +3349,10 @@ struct charset_info_st my_charset_utf8_general_cs= to_lower_utf8, /* to_lower */ to_upper_utf8, /* to_upper */ to_upper_utf8, /* sort_order */ - NULL, /* contractions */ - NULL, /* sort_order_big*/ + NULL, /* uca */ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ - my_unicase_default, /* caseinfo */ + &my_unicase_default,/* caseinfo */ NULL, /* state_map */ NULL, /* ident_map */ 1, /* strxfrm_multiply */ @@ -3265,6 +3364,7 @@ struct charset_info_st my_charset_utf8_general_cs= 255, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8_handler, &my_collation_cs_handler }; @@ -4570,6 +4670,7 @@ struct charset_info_st my_charset_filename= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_filename_handler, &my_collation_filename_handler }; @@ -5453,6 +5554,7 @@ struct charset_info_st my_charset_utf8mb4_general_ci= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8mb4_handler, &my_collation_utf8mb4_general_ci_handler }; @@ -5485,6 +5587,7 @@ struct charset_info_st my_charset_utf8mb4_bin= 0xFFFF, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 1, /* levels_for_order */ &my_charset_utf8mb4_handler, &my_collation_utf8mb4_bin_handler }; diff --git a/strings/ctype-win1250ch.c b/strings/ctype-win1250ch.c index d1cd51a5d8d..a688373f48e 100644 --- a/strings/ctype-win1250ch.c +++ b/strings/ctype-win1250ch.c @@ -489,9 +489,11 @@ int my_strnncollsp_win1250ch(CHARSET_INFO * cs, } -static size_t my_strnxfrm_win1250ch(CHARSET_INFO * cs __attribute__((unused)), - uchar *dest, size_t len, - const uchar *src, size_t srclen) +static size_t +my_strnxfrm_win1250ch(CHARSET_INFO *cs __attribute__((unused)), + uchar *dest, size_t len, + uint nweights_arg __attribute__((unused)), + const uchar *src, size_t srclen, uint flags) { int value; const uchar *p; @@ -499,15 +501,23 @@ static size_t my_strnxfrm_win1250ch(CHARSET_INFO * cs __attribute__((unused)), size_t totlen = 0; p = src; - do { + if (!(flags & 0x0F)) /* All levels by default */ + flags|= 0x0F; + + for (;;) + { NEXT_CMP_VALUE(src, p, pass, value, (int)srclen); - if (totlen <= len) - dest[totlen] = value; - totlen++; - } while (value) ; - if (len > totlen) - bfill(dest + totlen, len - totlen, ' '); - return len; + if (!value) + break; + if (totlen <= len && ((1 << pass) & flags)) + dest[totlen++] = value; + } + if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && len > totlen) + { + memset(dest + totlen, 0x00, len - totlen); + totlen= len; + } + return totlen; } #undef IS_END @@ -705,6 +715,7 @@ struct charset_info_st my_charset_cp1250_czech_ci = 0, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ + 2, /* levels_for_order */ &my_charset_8bit_handler, &my_collation_czech_ci_handler }; |