diff options
author | Alexander Barkov <bar@mariadb.org> | 2015-07-06 15:50:56 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mariadb.org> | 2015-07-06 15:50:56 +0400 |
commit | b2e324a21fbf481bccd15f9ee4bab819afefbed6 (patch) | |
tree | 2b005ed9e20ed4ed1abb22314046e144f1c805d6 /strings | |
parent | 35d8ac350d97557d06edd1cffe7ecc28fc68930a (diff) | |
download | mariadb-git-b2e324a21fbf481bccd15f9ee4bab819afefbed6.tar.gz |
MDEV-8416 ucs2: compare broken bytes as "greater than any non-broken character"
MDEV-8418 utf16: compare broken bytes as "greater than any non-broken character"
Diffstat (limited to 'strings')
-rw-r--r-- | strings/ctype-ucs2.c | 560 | ||||
-rw-r--r-- | strings/strcoll.ic | 19 |
2 files changed, 125 insertions, 454 deletions
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index d1441a4d3a5..90aa1a93bed 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -1186,11 +1186,14 @@ my_lengthsp_mb2(CHARSET_INFO *cs __attribute__((unused)), #define MY_UTF16_SURROGATE_LOW_FIRST 0xDC00 #define MY_UTF16_SURROGATE_LOW_LAST 0xDFFF -#define MY_UTF16_HIGH_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xD8) -#define MY_UTF16_LOW_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xDC) -#define MY_UTF16_SURROGATE(x) (((x) & 0xF800) == 0xD800) +#define MY_UTF16_HIGH_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xD8) +#define MY_UTF16_LOW_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xDC) +/* Test if a byte is a leading byte of a high or low surrogate head: */ +#define MY_UTF16_SURROGATE_HEAD(x) ((((uchar) (x)) & 0xF8) == 0xD8) +/* Test if a Unicode code point is a high or low surrogate head */ +#define MY_UTF16_SURROGATE(x) (((x) & 0xF800) == 0xD800) -#define MY_UTF16_WC2(a, b) ((a << 8) + b) +#define MY_UTF16_WC2(a, b) ((a << 8) + b) /* a= 110110?? (<< 18) @@ -1201,6 +1204,30 @@ my_lengthsp_mb2(CHARSET_INFO *cs __attribute__((unused)), #define MY_UTF16_WC4(a, b, c, d) (((a & 3) << 18) + (b << 10) + \ ((c & 3) << 8) + d + 0x10000) +#define IS_MB2_CHAR(b0,b1) (!MY_UTF16_SURROGATE_HEAD(b0)) +#define IS_MB4_CHAR(b0,b1,b2,b3) (MY_UTF16_HIGH_HEAD(b0) && MY_UTF16_LOW_HEAD(b2)) + +static inline int my_weight_mb2_utf16mb2_general_ci(uchar b0, uchar b1) +{ + my_wc_t wc= MY_UTF16_WC2(b0, b1); + MY_UNICASE_CHARACTER *page= my_unicase_default.page[wc >> 8]; + return (int) (page ? page[wc & 0xFF].sort : wc); +} +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16_general_ci +#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) +#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b0,b1) +#define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF16_WC4(b0, b1, b2, b3)) +#include "strcoll.ic" + +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16_bin +#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) +#define WEIGHT_MB2(b0,b1) ((int) MY_UTF16_WC2(b0, b1)) +#define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF16_WC4(b0, b1, b2, b3)) +#include "strcoll.ic" + +#undef IS_MB2_CHAR +#undef IS_MB4_CHAR + static int my_utf16_uni(CHARSET_INFO *cs __attribute__((unused)), my_wc_t *pwc, const uchar *s, const uchar *e) @@ -1371,146 +1398,6 @@ my_casedn_utf16(CHARSET_INFO *cs, char *src, size_t srclen, } -static int -my_strnncoll_utf16(CHARSET_INFO *cs, - const uchar *s, size_t slen, - const uchar *t, size_t tlen, - my_bool t_is_prefix) -{ - int s_res, t_res; - my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc); - my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc; - const uchar *se= s + slen; - const uchar *te= t + tlen; - MY_UNICASE_INFO *uni_plane= cs->caseinfo; - - while (s < se && t < te) - { - s_res= mb_wc(cs, &s_wc, s, se); - t_res= mb_wc(cs, &t_wc, t, te); - - if (s_res <= 0 || t_res <= 0) - { - /* Incorrect string, compare by char value */ - return my_bincmp(s, se, t, te); - } - - my_tosort_utf16(uni_plane, &s_wc); - my_tosort_utf16(uni_plane, &t_wc); - - if (s_wc != t_wc) - { - return s_wc > t_wc ? 1 : -1; - } - - s+= s_res; - t+= t_res; - } - return (int) (t_is_prefix ? (t - te) : ((se - s) - (te - t))); -} - - -/** - Compare strings, discarding end space - - If one string is shorter as the other, then we space extend the other - so that the strings have equal length. - - This will ensure that the following things hold: - - "a" == "a " - "a\0" < "a" - "a\0" < "a " - - @param cs Character set pinter. - @param a First string to compare. - @param a_length Length of 'a'. - @param b Second string to compare. - @param b_length Length of 'b'. - - IMPLEMENTATION - - @return Comparison result. - @retval Negative number, if a less than b. - @retval 0, if a is equal to b - @retval Positive number, if a > b -*/ - -static int -my_strnncollsp_utf16(CHARSET_INFO *cs, - const uchar *s, size_t slen, - const uchar *t, size_t tlen, - my_bool diff_if_only_endspace_difference) -{ - int res; - my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc); - my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc; - const uchar *se= s + slen, *te= t + tlen; - MY_UNICASE_INFO *uni_plane= cs->caseinfo; - - DBUG_ASSERT((slen % 2) == 0); - DBUG_ASSERT((tlen % 2) == 0); - -#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE - diff_if_only_endspace_difference= FALSE; -#endif - - while (s < se && t < te) - { - int s_res= mb_wc(cs, &s_wc, s, se); - int t_res= mb_wc(cs, &t_wc, t, te); - - if (s_res <= 0 || t_res <= 0) - { - /* Incorrect string, compare bytewise */ - return my_bincmp(s, se, t, te); - } - - my_tosort_utf16(uni_plane, &s_wc); - my_tosort_utf16(uni_plane, &t_wc); - - if (s_wc != t_wc) - { - return s_wc > t_wc ? 1 : -1; - } - - s+= s_res; - t+= t_res; - } - - slen= (size_t) (se - s); - tlen= (size_t) (te - t); - res= 0; - - if (slen != tlen) - { - int s_res, swap= 1; - if (diff_if_only_endspace_difference) - res= 1; /* Assume 's' is bigger */ - if (slen < tlen) - { - slen= tlen; - s= t; - se= te; - swap= -1; - res= -res; - } - - for ( ; s < se; s+= s_res) - { - if ((s_res= mb_wc(cs, &s_wc, s, se)) < 0) - { - DBUG_ASSERT(0); - return 0; - } - if (s_wc != ' ') - return (s_wc < ' ') ? -swap : swap; - } - } - return res; -} - - static uint my_ismbchar_utf16(CHARSET_INFO *cs, const char *b, const char *e) { @@ -1623,111 +1510,6 @@ my_wildcmp_utf16_bin(CHARSET_INFO *cs, } -static int -my_strnncoll_utf16_bin(CHARSET_INFO *cs, - const uchar *s, size_t slen, - const uchar *t, size_t tlen, - my_bool t_is_prefix) -{ - int s_res,t_res; - my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc); - my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc; - const uchar *se=s+slen; - const uchar *te=t+tlen; - - while ( s < se && t < te ) - { - s_res= mb_wc(cs, &s_wc, s, se); - t_res= mb_wc(cs, &t_wc, t, te); - - if (s_res <= 0 || t_res <= 0) - { - /* Incorrect string, compare by char value */ - return my_bincmp(s, se, t, te); - } - if (s_wc != t_wc) - { - return s_wc > t_wc ? 1 : -1; - } - - s+= s_res; - t+= t_res; - } - return (int) (t_is_prefix ? (t - te) : ((se - s) - (te - t))); -} - - -static int -my_strnncollsp_utf16_bin(CHARSET_INFO *cs, - const uchar *s, size_t slen, - const uchar *t, size_t tlen, - my_bool diff_if_only_endspace_difference) -{ - int res; - my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc); - my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc; - const uchar *se= s + slen, *te= t + tlen; - - DBUG_ASSERT((slen % 2) == 0); - DBUG_ASSERT((tlen % 2) == 0); - -#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE - diff_if_only_endspace_difference= FALSE; -#endif - - while (s < se && t < te) - { - int s_res= mb_wc(cs, &s_wc, s, se); - int t_res= mb_wc(cs, &t_wc, t, te); - - if (s_res <= 0 || t_res <= 0) - { - /* Incorrect string, compare bytewise */ - return my_bincmp(s, se, t, te); - } - - if (s_wc != t_wc) - { - return s_wc > t_wc ? 1 : -1; - } - - s+= s_res; - t+= t_res; - } - - slen= (size_t) (se - s); - tlen= (size_t) (te - t); - res= 0; - - if (slen != tlen) - { - int s_res, swap= 1; - if (diff_if_only_endspace_difference) - res= 1; /* Assume 's' is bigger */ - if (slen < tlen) - { - slen= tlen; - s= t; - se= te; - swap= -1; - res= -res; - } - - for ( ; s < se; s+= s_res) - { - if ((s_res= mb_wc(cs, &s_wc, s, se)) < 0) - { - DBUG_ASSERT(0); - return 0; - } - if (s_wc != ' ') - return (s_wc < ' ') ? -swap : swap; - } - } - return res; -} - - static void my_hash_sort_utf16_bin(CHARSET_INFO *cs, const uchar *pos, size_t len, ulong *nr1, ulong *nr2) @@ -1747,8 +1529,8 @@ my_hash_sort_utf16_bin(CHARSET_INFO *cs, static MY_COLLATION_HANDLER my_collation_utf16_general_ci_handler = { NULL, /* init */ - my_strnncoll_utf16, - my_strnncollsp_utf16, + my_strnncoll_utf16_general_ci, + my_strnncollsp_utf16_general_ci, my_strnxfrm_unicode, my_strnxfrmlen_unicode, my_like_range_generic, @@ -1877,6 +1659,24 @@ struct charset_info_st my_charset_utf16_bin= }; +#define IS_MB2_CHAR(b0,b1) (!MY_UTF16_SURROGATE_HEAD(b1)) +#define IS_MB4_CHAR(b0,b1,b2,b3) (MY_UTF16_HIGH_HEAD(b1) && MY_UTF16_LOW_HEAD(b3)) + +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16le_general_ci +#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) +#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b1,b0) +#define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF16_WC4(b1, b0, b3, b2)) +#include "strcoll.ic" + +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16le_bin +#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) +#define WEIGHT_MB2(b0,b1) ((int) MY_UTF16_WC2(b1, b0)) +#define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF16_WC4(b1, b0, b3, b2)) +#include "strcoll.ic" + +#undef IS_MB2_CHAR +#undef IS_MB4_CHAR + static int my_utf16le_uni(CHARSET_INFO *cs __attribute__((unused)), my_wc_t *pwc, const uchar *s, const uchar *e) @@ -1948,6 +1748,38 @@ my_lengthsp_utf16le(CHARSET_INFO *cs __attribute__((unused)), } +static MY_COLLATION_HANDLER my_collation_utf16le_general_ci_handler = +{ + NULL, /* init */ + my_strnncoll_utf16le_general_ci, + my_strnncollsp_utf16le_general_ci, + my_strnxfrm_unicode, + my_strnxfrmlen_unicode, + my_like_range_generic, + my_wildcmp_utf16_ci, + my_strcasecmp_mb2_or_mb4, + my_instr_mb, + my_hash_sort_utf16, + my_propagate_simple +}; + + +static MY_COLLATION_HANDLER my_collation_utf16le_bin_handler = +{ + NULL, /* init */ + my_strnncoll_utf16le_bin, + my_strnncollsp_utf16le_bin, + my_strnxfrm_unicode_full_bin, + my_strnxfrmlen_unicode_full_bin, + my_like_range_generic, + my_wildcmp_utf16_bin, + my_strcasecmp_mb2_or_mb4, + my_instr_mb, + my_hash_sort_utf16_bin, + my_propagate_simple +}; + + static MY_CHARSET_HANDLER my_charset_utf16le_handler= { NULL, /* init */ @@ -2012,7 +1844,7 @@ struct charset_info_st my_charset_utf16le_general_ci= 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ &my_charset_utf16le_handler, - &my_collation_utf16_general_ci_handler + &my_collation_utf16le_general_ci_handler }; @@ -2045,7 +1877,7 @@ struct charset_info_st my_charset_utf16le_bin= 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ &my_charset_utf16le_handler, - &my_collation_utf16_bin_handler + &my_collation_utf16le_bin_handler }; @@ -3058,6 +2890,31 @@ static const uchar to_upper_ucs2[] = { }; +/* Definitions for strcoll.ic */ +#define IS_MB2_CHAR(x,y) (1) +#define UCS2_CODE(b0,b1) (((uchar) b0) << 8 | ((uchar) b1)) + + +static inline int my_weight_mb2_ucs2_general_ci(uchar b0, uchar b1) +{ + my_wc_t wc= UCS2_CODE(b0, b1); + MY_UNICASE_CHARACTER *page= my_unicase_default.page[wc >> 8]; + return (int) (page ? page[wc & 0xFF].sort : wc); +} + + +#define MY_FUNCTION_NAME(x) my_ ## x ## _ucs2_general_ci +#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) +#define WEIGHT_MB2(b0,b1) my_weight_mb2_ucs2_general_ci(b0,b1) +#include "strcoll.ic" + + +#define MY_FUNCTION_NAME(x) my_ ## x ## _ucs2_bin +#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) +#define WEIGHT_MB2(b0,b1) UCS2_CODE(b0,b1) +#include "strcoll.ic" + + static int my_charlen_ucs2(CHARSET_INFO *cs __attribute__((unused)), const uchar *s, const uchar *e) @@ -3208,120 +3065,6 @@ my_fill_ucs2(CHARSET_INFO *cs __attribute__((unused)), } -static int my_strnncoll_ucs2(CHARSET_INFO *cs, - const uchar *s, size_t slen, - const uchar *t, size_t tlen, - my_bool t_is_prefix) -{ - int s_res,t_res; - my_wc_t UNINIT_VAR(s_wc),UNINIT_VAR(t_wc); - const uchar *se=s+slen; - const uchar *te=t+tlen; - MY_UNICASE_INFO *uni_plane= cs->caseinfo; - - while ( s < se && t < te ) - { - s_res=my_ucs2_uni(cs,&s_wc, s, se); - t_res=my_ucs2_uni(cs,&t_wc, t, te); - - if ( s_res <= 0 || t_res <= 0 ) - { - /* Incorrect string, compare by char value */ - return ((int)s[0]-(int)t[0]); - } - - my_tosort_ucs2(uni_plane, &s_wc); - my_tosort_ucs2(uni_plane, &t_wc); - - if ( s_wc != t_wc ) - { - return s_wc > t_wc ? 1 : -1; - } - - s+=s_res; - t+=t_res; - } - return (int) (t_is_prefix ? t-te : ((se-s) - (te-t))); -} - -/* - Compare strings, discarding end space - - SYNOPSIS - my_strnncollsp_ucs2() - cs character set handler - a First string to compare - a_length Length of 'a' - b Second string to compare - b_length Length of 'b' - - IMPLEMENTATION - If one string is shorter as the other, then we space extend the other - so that the strings have equal length. - - This will ensure that the following things hold: - - "a" == "a " - "a\0" < "a" - "a\0" < "a " - - RETURN - < 0 a < b - = 0 a == b - > 0 a > b -*/ - -static int my_strnncollsp_ucs2(CHARSET_INFO *cs __attribute__((unused)), - const uchar *s, size_t slen, - const uchar *t, size_t tlen, - my_bool diff_if_only_endspace_difference - __attribute__((unused))) -{ - const uchar *se, *te; - size_t minlen; - MY_UNICASE_INFO *uni_plane= cs->caseinfo; - - /* extra safety to make sure the lengths are even numbers */ - slen&= ~1; - tlen&= ~1; - - se= s + slen; - te= t + tlen; - - for (minlen= MY_MIN(slen, tlen); minlen; minlen-= 2) - { - int s_wc = uni_plane->page[s[0]] ? (int) uni_plane->page[s[0]][s[1]].sort : - (((int) s[0]) << 8) + (int) s[1]; - - int t_wc = uni_plane->page[t[0]] ? (int) uni_plane->page[t[0]][t[1]].sort : - (((int) t[0]) << 8) + (int) t[1]; - if ( s_wc != t_wc ) - return s_wc > t_wc ? 1 : -1; - - s+= 2; - t+= 2; - } - - if (slen != tlen) - { - int swap= 1; - if (slen < tlen) - { - s= t; - se= te; - swap= -1; - } - - for ( ; s < se ; s+= 2) - { - if (s[0] || s[1] != ' ') - return (s[0] == 0 && s[1] < ' ') ? -swap : swap; - } - } - return 0; -} - - static uint my_ismbchar_ucs2(CHARSET_INFO *cs __attribute__((unused)), const char *b, const char *e) @@ -3418,85 +3161,6 @@ int my_wildcmp_ucs2_bin(CHARSET_INFO *cs, static -int my_strnncoll_ucs2_bin(CHARSET_INFO *cs, - const uchar *s, size_t slen, - const uchar *t, size_t tlen, - my_bool t_is_prefix) -{ - int s_res,t_res; - my_wc_t UNINIT_VAR(s_wc),UNINIT_VAR(t_wc); - const uchar *se=s+slen; - const uchar *te=t+tlen; - - while ( s < se && t < te ) - { - s_res=my_ucs2_uni(cs,&s_wc, s, se); - t_res=my_ucs2_uni(cs,&t_wc, t, te); - - if ( s_res <= 0 || t_res <= 0 ) - { - /* Incorrect string, compare by char value */ - return ((int)s[0]-(int)t[0]); - } - if ( s_wc != t_wc ) - { - return s_wc > t_wc ? 1 : -1; - } - - s+=s_res; - t+=t_res; - } - return (int) (t_is_prefix ? t-te : ((se-s) - (te-t))); -} - -static int my_strnncollsp_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)), - const uchar *s, size_t slen, - const uchar *t, size_t tlen, - my_bool diff_if_only_endspace_difference - __attribute__((unused))) -{ - const uchar *se, *te; - size_t minlen; - - /* extra safety to make sure the lengths are even numbers */ - slen= (slen >> 1) << 1; - tlen= (tlen >> 1) << 1; - - se= s + slen; - te= t + tlen; - - for (minlen= MY_MIN(slen, tlen); minlen; minlen-= 2) - { - int s_wc= s[0] * 256 + s[1]; - int t_wc= t[0] * 256 + t[1]; - if ( s_wc != t_wc ) - return s_wc > t_wc ? 1 : -1; - - s+= 2; - t+= 2; - } - - if (slen != tlen) - { - int swap= 1; - if (slen < tlen) - { - s= t; - se= te; - swap= -1; - } - - for ( ; s < se ; s+= 2) - { - if (s[0] || s[1] != ' ') - return (s[0] == 0 && s[1] < ' ') ? -swap : swap; - } - } - return 0; -} - - -static void my_hash_sort_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)), const uchar *key, size_t len,ulong *nr1, ulong *nr2) { @@ -3518,8 +3182,8 @@ void my_hash_sort_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)), static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler = { NULL, /* init */ - my_strnncoll_ucs2, - my_strnncollsp_ucs2, + my_strnncoll_ucs2_general_ci, + my_strnncollsp_ucs2_general_ci, my_strnxfrm_unicode, my_strnxfrmlen_unicode, my_like_range_generic, diff --git a/strings/strcoll.ic b/strings/strcoll.ic index 5f4ee615d84..4bced593a23 100644 --- a/strings/strcoll.ic +++ b/strings/strcoll.ic @@ -64,13 +64,16 @@ @return - the number of bytes scanned The including source file must define the following macros: - IS_MB1_CHAR(x) - IS_MB1_MB2HEAD_GAP(x) - optional, for better performance - IS_MB2_CHAR(x,y) - IS_MB3_CHAR(x,y,z) - for character sets with mbmaxlen>2 + IS_MB1_CHAR(b0) - for character sets that have MB1 characters + IS_MB1_MB2HEAD_GAP(b0) - optional, for better performance + IS_MB2_CHAR(b0,b1) - for character sets that have MB2 characters + IS_MB3_CHAR(b0,b1,b2) - for character sets that have MB3 characters + IS_MB4_CHAR(b0,b1,b2,b3) - for character sets with have MB4 characters WEIGHT_PAD_SPACE - WEIGHT_MB1(x) - WEIGHT_MB2(x,y) + WEIGHT_MB1(b0) - for character sets that have MB1 characters + WEIGHT_MB2(b0,b1) - for character sets that have MB2 characters + WEIGHT_MB3(b0,b1,b2) - for character sets that have MB3 characters + WEIGHT_MB4(b0,b1,b2,b3) - for character sets that have MB4 characters WEIGHT_ILSEQ(x) */ static inline uint @@ -82,11 +85,13 @@ MY_FUNCTION_NAME(scan_weight)(int *weight, const uchar *str, const uchar *end) return 0; } +#ifdef IS_MB1_CHAR if (IS_MB1_CHAR(*str)) { *weight= WEIGHT_MB1(*str); /* A valid single byte character*/ return 1; } +#endif #ifdef IS_MB1_MBHEAD_UNUSED_GAP /* @@ -98,6 +103,7 @@ MY_FUNCTION_NAME(scan_weight)(int *weight, const uchar *str, const uchar *end) goto bad; #endif +#ifdef IS_MB2_CHAR if (str + 2 > end) /* The string ended unexpectedly */ goto bad; /* Treat as a bad byte */ @@ -106,6 +112,7 @@ MY_FUNCTION_NAME(scan_weight)(int *weight, const uchar *str, const uchar *end) *weight= WEIGHT_MB2(str[0], str[1]); return 2; /* A valid two-byte character */ } +#endif #ifdef IS_MB3_CHAR if (str + 3 > end) /* Incomplete three-byte character */ |