diff options
author | Alexander Barkov <bar@mariadb.org> | 2015-08-14 18:34:41 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mariadb.org> | 2015-08-14 18:34:41 +0400 |
commit | 78b80cb6baa3f9c4e58355888e8c6defab186440 (patch) | |
tree | 70eb8bf0de64d49f7bb27cd89b54070ce9564588 /strings | |
parent | bfb6ea02321f963c2b5f5beb333283585d3d1be3 (diff) | |
download | mariadb-git-78b80cb6baa3f9c4e58355888e8c6defab186440.tar.gz |
Adding MY_CHARSET_HANDLER::native_to_mb().
This is a pre-requisite patch for:
- MDEV-8433 Make field<'broken-string' use indexes
- MDEV-8625 Bad result set with ignorable characters when using a prefix key
- MDEV-8626 Bad result set with contractions when using a prefix key
Diffstat (limited to 'strings')
-rw-r--r-- | strings/ctype-big5.c | 1 | ||||
-rw-r--r-- | strings/ctype-bin.c | 7 | ||||
-rw-r--r-- | strings/ctype-cp932.c | 1 | ||||
-rw-r--r-- | strings/ctype-euc_kr.c | 1 | ||||
-rw-r--r-- | strings/ctype-eucjpms.c | 1 | ||||
-rw-r--r-- | strings/ctype-gb2312.c | 1 | ||||
-rw-r--r-- | strings/ctype-gbk.c | 1 | ||||
-rw-r--r-- | strings/ctype-latin1.c | 1 | ||||
-rw-r--r-- | strings/ctype-mb.c | 21 | ||||
-rw-r--r-- | strings/ctype-mb.ic | 64 | ||||
-rw-r--r-- | strings/ctype-simple.c | 1 | ||||
-rw-r--r-- | strings/ctype-sjis.c | 1 | ||||
-rw-r--r-- | strings/ctype-tis620.c | 1 | ||||
-rw-r--r-- | strings/ctype-ucs2.c | 4 | ||||
-rw-r--r-- | strings/ctype-ujis.c | 1 | ||||
-rw-r--r-- | strings/ctype-utf8.c | 3 |
16 files changed, 87 insertions, 23 deletions
diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c index 925398a4d82..d6a9695afbf 100644 --- a/strings/ctype-big5.c +++ b/strings/ctype-big5.c @@ -6847,6 +6847,7 @@ static MY_CHARSET_HANDLER my_charset_big5_handler= my_charlen_big5, my_well_formed_char_length_big5, my_copy_fix_mb, + my_native_to_mb_big5, }; struct charset_info_st my_charset_big5_chinese_ci= diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c index 95f31038ee6..4d42973f69f 100644 --- a/strings/ctype-bin.c +++ b/strings/ctype-bin.c @@ -256,10 +256,8 @@ static int my_mb_wc_bin(CHARSET_INFO *cs __attribute__((unused)), } -static int my_wc_mb_bin(CHARSET_INFO *cs __attribute__((unused)), - my_wc_t wc, - uchar *s, - uchar *e __attribute__((unused))) +int my_wc_mb_bin(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t wc, uchar *s, uchar *e) { if (s >= e) return MY_CS_TOOSMALL; @@ -552,6 +550,7 @@ static MY_CHARSET_HANDLER my_charset_handler= my_charlen_8bit, my_well_formed_char_length_8bit, my_copy_8bit, + my_wc_mb_bin, }; diff --git a/strings/ctype-cp932.c b/strings/ctype-cp932.c index 7a4abfa39d1..9bf206f1de7 100644 --- a/strings/ctype-cp932.c +++ b/strings/ctype-cp932.c @@ -34722,6 +34722,7 @@ static MY_CHARSET_HANDLER my_charset_handler= my_charlen_cp932, my_well_formed_char_length_cp932, my_copy_fix_mb, + my_native_to_mb_cp932, }; diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c index f4d4b445bb2..1f13ab66284 100644 --- a/strings/ctype-euc_kr.c +++ b/strings/ctype-euc_kr.c @@ -10016,6 +10016,7 @@ static MY_CHARSET_HANDLER my_charset_handler= my_charlen_euckr, my_well_formed_char_length_euckr, my_copy_fix_mb, + my_native_to_mb_euckr, }; diff --git a/strings/ctype-eucjpms.c b/strings/ctype-eucjpms.c index d331f643079..82c4bb5a4e8 100644 --- a/strings/ctype-eucjpms.c +++ b/strings/ctype-eucjpms.c @@ -67549,6 +67549,7 @@ static MY_CHARSET_HANDLER my_charset_handler= my_charlen_eucjpms, my_well_formed_char_length_eucjpms, my_copy_fix_mb, + my_native_to_mb_eucjpms, }; diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c index e986584d356..b0e275fe93d 100644 --- a/strings/ctype-gb2312.c +++ b/strings/ctype-gb2312.c @@ -6420,6 +6420,7 @@ static MY_CHARSET_HANDLER my_charset_handler= my_charlen_gb2312, my_well_formed_char_length_gb2312, my_copy_fix_mb, + my_native_to_mb_gb2312, }; diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c index 2d4dbaf202a..37b003f1899 100644 --- a/strings/ctype-gbk.c +++ b/strings/ctype-gbk.c @@ -10732,6 +10732,7 @@ static MY_CHARSET_HANDLER my_charset_handler= my_charlen_gbk, my_well_formed_char_length_gbk, my_copy_fix_mb, + my_native_to_mb_gbk, }; diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c index bc51911dceb..8bc3ac3365d 100644 --- a/strings/ctype-latin1.c +++ b/strings/ctype-latin1.c @@ -425,6 +425,7 @@ static MY_CHARSET_HANDLER my_charset_handler= my_charlen_8bit, my_well_formed_char_length_8bit, my_copy_8bit, + my_wc_mb_bin, /* native_to_mb */ }; diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c index a7f9e144fe8..eef283d2925 100644 --- a/strings/ctype-mb.c +++ b/strings/ctype-mb.c @@ -811,25 +811,8 @@ my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)), static void pad_max_char(CHARSET_INFO *cs, char *str, char *end) { char buf[10]; - char buflen; - - if (!(cs->state & MY_CS_UNICODE)) - { - if (cs->max_sort_char <= 255) - { - bfill(str, end - str, cs->max_sort_char); - return; - } - buf[0]= cs->max_sort_char >> 8; - buf[1]= cs->max_sort_char & 0xFF; - buflen= 2; - } - else - { - buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (uchar*) buf, - (uchar*) buf + sizeof(buf)); - } - + char buflen= cs->cset->native_to_mb(cs, cs->max_sort_char, (uchar*) buf, + (uchar*) buf + sizeof(buf)); DBUG_ASSERT(buflen > 0); do { diff --git a/strings/ctype-mb.ic b/strings/ctype-mb.ic index 0a9c47090fe..0ad945b685d 100644 --- a/strings/ctype-mb.ic +++ b/strings/ctype-mb.ic @@ -33,6 +33,7 @@ #define DEFINE_WELL_FORMED_LEN #define DEFINE_WELL_FORMED_CHAR_LENGTH #define DEFINE_CHARLEN +#define DEFINE_NATIVE_TO_MB_VARLEN #endif @@ -257,4 +258,67 @@ MY_FUNCTION_NAME(well_formed_char_length)(CHARSET_INFO *cs __attribute__((unused } #endif /* DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN */ + +#ifdef DEFINE_NATIVE_TO_MB_VARLEN +/* + Write a native 2-byte character. + If the full character does not fit, only the first byte is written. +*/ +static inline int +my_native_to_mb_fixed2(my_wc_t wc, uchar *s, uchar *e) +{ + /* The caller must insure there is a space for at least one byte */ + DBUG_ASSERT(s < e); + s[0]= wc >> 8; + if (s + 2 > e) + return MY_CS_TOOSMALL2; + s[1]= wc & 0xFF; + return 2; +} + + +/* + Write a native 3-byte character. + If the full character does not fit, only the leading bytes are written. +*/ +static inline int +my_native_to_mb_fixed3(my_wc_t wc, uchar *s, uchar *e) +{ + /* The caller must insure there is a space for at least one byte */ + DBUG_ASSERT(s < e); + s[0]= wc >> 16; + if (s + 2 > e) + return MY_CS_TOOSMALL2; + s[1]= (wc >> 8) & 0xFF; + if (s + 3 > e) + return MY_CS_TOOSMALL3; + s[2]= wc & 0xFF; + return 3; +} + + +/* + Write a native 1-byte or 2-byte or 3-byte character. +*/ + +static int +MY_FUNCTION_NAME(native_to_mb)(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t wc, uchar *s, uchar *e) +{ + if (s >= e) + return MY_CS_TOOSMALL; + if ((int) wc <= 0xFF) + { + s[0]= (uchar) wc; + return 1; + } +#ifdef IS_MB3_HEAD + if (wc > 0xFFFF) + return my_native_to_mb_fixed3(wc, s, e); +#endif + return my_native_to_mb_fixed2(wc, s, e); +} +#endif /* DEFINE_NATIVE_TO_MB_VARLEN */ + + #undef MY_FUNCTION_NAME diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c index 394924c8209..020bfd0035a 100644 --- a/strings/ctype-simple.c +++ b/strings/ctype-simple.c @@ -1950,6 +1950,7 @@ MY_CHARSET_HANDLER my_charset_8bit_handler= my_charlen_8bit, my_well_formed_char_length_8bit, my_copy_8bit, + my_wc_mb_bin, /* native_to_mb */ }; MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler = diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c index 57e674f47a6..629e1cd8309 100644 --- a/strings/ctype-sjis.c +++ b/strings/ctype-sjis.c @@ -34101,6 +34101,7 @@ static MY_CHARSET_HANDLER my_charset_handler= my_charlen_sjis, my_well_formed_char_length_sjis, my_copy_fix_mb, + my_native_to_mb_sjis, }; diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c index 6537b380ab3..c62e58846bb 100644 --- a/strings/ctype-tis620.c +++ b/strings/ctype-tis620.c @@ -889,6 +889,7 @@ static MY_CHARSET_HANDLER my_charset_handler= my_charlen_8bit, my_well_formed_char_length_8bit, my_copy_8bit, + my_wc_mb_bin, /* native_to_mb */ }; diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index 02adc1492c4..e7ba5cbc3c3 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -1590,6 +1590,7 @@ MY_CHARSET_HANDLER my_charset_utf16_handler= my_charlen_utf16, my_well_formed_char_length_utf16, my_copy_fix_mb2_or_mb4, + my_uni_utf16, }; @@ -1812,6 +1813,7 @@ static MY_CHARSET_HANDLER my_charset_utf16le_handler= my_charlen_utf16, my_well_formed_char_length_utf16, my_copy_fix_mb2_or_mb4, + my_uni_utf16le, }; @@ -2556,6 +2558,7 @@ MY_CHARSET_HANDLER my_charset_utf32_handler= my_charlen_utf32, my_well_formed_char_length_utf32, my_copy_fix_mb2_or_mb4, + my_uni_utf32, }; @@ -3042,6 +3045,7 @@ MY_CHARSET_HANDLER my_charset_ucs2_handler= my_charlen_ucs2, my_well_formed_char_length_ucs2, my_copy_fix_mb2_or_mb4, + my_uni_ucs2, }; diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c index 497ad67da05..308f5f0f7d1 100644 --- a/strings/ctype-ujis.c +++ b/strings/ctype-ujis.c @@ -67293,6 +67293,7 @@ static MY_CHARSET_HANDLER my_charset_handler= my_charlen_ujis, my_well_formed_char_length_ujis, my_copy_fix_mb, + my_native_to_mb_ujis, }; diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index 259928130b9..3c2c812a004 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -5526,6 +5526,7 @@ MY_CHARSET_HANDLER my_charset_utf8_handler= my_charlen_utf8, my_well_formed_char_length_utf8, my_copy_fix_mb, + my_uni_utf8, }; @@ -7109,6 +7110,7 @@ static MY_CHARSET_HANDLER my_charset_filename_handler= my_charlen_filename, my_well_formed_char_length_filename, my_copy_fix_mb, + my_wc_mb_filename, }; @@ -7879,6 +7881,7 @@ MY_CHARSET_HANDLER my_charset_utf8mb4_handler= my_charlen_utf8mb4, my_well_formed_char_length_utf8mb4, my_copy_fix_mb, + my_wc_mb_utf8mb4, }; |