diff options
author | Alexander Barkov <bar@mariadb.com> | 2023-02-17 16:20:01 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mariadb.com> | 2023-02-17 17:33:27 +0400 |
commit | 7f6b648d7db895c09a6ace3b5ee62a5fb482a8e0 (patch) | |
tree | 66e727d8e65166497e68447ba659650d0cd53768 /strings/ctype-ujis.c | |
parent | 345356b868d840554a8572876efc027d3ccd9842 (diff) | |
download | mariadb-git-7f6b648d7db895c09a6ace3b5ee62a5fb482a8e0.tar.gz |
MDEV-30661 UPPER() returns an empty string for U+0251 in uca1400 collations for utf8
String length growth during upper/lower conversion
in Unicode collations depends only on the underlying MY_UNICASE_INFO
used in the collation.
Maintaining a separate member CHARSET_INFO::caseup_multiply and
CHARSET_INFO::casedn_multiply duplicated this information
and caused bugs like this (when MY_UNICASE_INFO and case??_multiply
when out of sync because of incomplete CHARSET_INFO initialization).
Fix:
Changing CHARSET_INFO::caseup_multiply and CHARSET_INFO::casedn_multiply
from members to virtual functions.
The virtual functions in Unicode collations calculate case conversion
growth factors from the MY_UNICASE_INFO. This guarantees that the growth
factors are always in sync with the MY_UNICASE_INFO.
Diffstat (limited to 'strings/ctype-ujis.c')
-rw-r--r-- | strings/ctype-ujis.c | 20 |
1 files changed, 7 insertions, 13 deletions
diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c index ef704e5c15c..143b2c0ae9c 100644 --- a/strings/ctype-ujis.c +++ b/strings/ctype-ujis.c @@ -67218,8 +67218,8 @@ size_t my_casedn_ujis(CHARSET_INFO * cs, const char *src, size_t srclen, char *dst, size_t dstlen) { - DBUG_ASSERT(dstlen >= srclen * cs->casedn_multiply); - DBUG_ASSERT(src != dst || cs->casedn_multiply == 1); + DBUG_ASSERT(dstlen >= srclen * cs->cset->casedn_multiply(cs)); + DBUG_ASSERT(src != dst || cs->cset->casedn_multiply(cs) == 1); return my_casefold_ujis(cs, src, srclen, dst, dstlen, cs->to_lower, 0); } @@ -67231,8 +67231,8 @@ size_t my_caseup_ujis(CHARSET_INFO * cs, const char *src, size_t srclen, char *dst, size_t dstlen) { - DBUG_ASSERT(dstlen >= srclen * cs->caseup_multiply); - DBUG_ASSERT(src != dst || cs->caseup_multiply == 1); + DBUG_ASSERT(dstlen >= srclen * cs->cset->caseup_multiply(cs)); + DBUG_ASSERT(src != dst || cs->cset->caseup_multiply(cs) == 1); return my_casefold_ujis(cs, src, srclen, dst, dstlen, cs->to_upper, 1); } #endif /* defined(HAVE_CHARSET_ujis) || defined(HAVE_CHARSET_eucjpms) */ @@ -67355,7 +67355,9 @@ static MY_CHARSET_HANDLER my_charset_handler= my_well_formed_char_length_ujis, my_copy_fix_mb, my_native_to_mb_ujis, - my_wc_to_printable_generic + my_wc_to_printable_generic, + my_casefold_multiply_1, + my_casefold_multiply_2 }; @@ -67379,8 +67381,6 @@ struct charset_info_st my_charset_ujis_japanese_ci= NULL, /* state_map */ NULL, /* ident_map */ 1, /* strxfrm_multiply */ - 1, /* caseup_multiply */ - 2, /* casedn_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ 0, /* min_sort_char */ @@ -67412,8 +67412,6 @@ struct charset_info_st my_charset_ujis_bin= NULL, /* state_map */ NULL, /* ident_map */ 1, /* strxfrm_multiply */ - 1, /* caseup_multiply */ - 2, /* casedn_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ 0, /* min_sort_char */ @@ -67445,8 +67443,6 @@ struct charset_info_st my_charset_ujis_japanese_nopad_ci= NULL, /* state_map */ NULL, /* ident_map */ 1, /* strxfrm_multiply */ - 1, /* caseup_multiply */ - 2, /* casedn_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ 0, /* min_sort_char */ @@ -67478,8 +67474,6 @@ struct charset_info_st my_charset_ujis_nopad_bin= NULL, /* state_map */ NULL, /* ident_map */ 1, /* strxfrm_multiply */ - 1, /* caseup_multiply */ - 2, /* casedn_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ 0, /* min_sort_char */ |