summaryrefslogtreecommitdiff
path: root/include/m_ctype.h
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mariadb.com>2023-02-17 16:20:01 +0400
committerAlexander Barkov <bar@mariadb.com>2023-02-17 17:33:27 +0400
commit7f6b648d7db895c09a6ace3b5ee62a5fb482a8e0 (patch)
tree66e727d8e65166497e68447ba659650d0cd53768 /include/m_ctype.h
parent345356b868d840554a8572876efc027d3ccd9842 (diff)
downloadmariadb-git-7f6b648d7db895c09a6ace3b5ee62a5fb482a8e0.tar.gz
MDEV-30661 UPPER() returns an empty string for U+0251 in uca1400 collations for utf8
String length growth during upper/lower conversion in Unicode collations depends only on the underlying MY_UNICASE_INFO used in the collation. Maintaining a separate member CHARSET_INFO::caseup_multiply and CHARSET_INFO::casedn_multiply duplicated this information and caused bugs like this (when MY_UNICASE_INFO and case??_multiply when out of sync because of incomplete CHARSET_INFO initialization). Fix: Changing CHARSET_INFO::caseup_multiply and CHARSET_INFO::casedn_multiply from members to virtual functions. The virtual functions in Unicode collations calculate case conversion growth factors from the MY_UNICASE_INFO. This guarantees that the growth factors are always in sync with the MY_UNICASE_INFO.
Diffstat (limited to 'include/m_ctype.h')
-rw-r--r--include/m_ctype.h15
1 files changed, 13 insertions, 2 deletions
diff --git a/include/m_ctype.h b/include/m_ctype.h
index 0b1cd2e0fb3..f980fd2053c 100644
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@@ -720,6 +720,9 @@ struct my_charset_handler_st
*/
my_charset_conv_wc_mb native_to_mb;
my_charset_conv_wc_mb wc_to_printable;
+
+ uint (*caseup_multiply)(CHARSET_INFO *cs);
+ uint (*casedn_multiply)(CHARSET_INFO *cs);
};
extern MY_CHARSET_HANDLER my_charset_8bit_handler;
@@ -756,8 +759,6 @@ struct charset_info_st
const uchar *state_map;
const uchar *ident_map;
uint strxfrm_multiply;
- uchar caseup_multiply;
- uchar casedn_multiply;
uint mbminlen;
uint mbmaxlen;
/*
@@ -827,6 +828,16 @@ struct charset_info_st
return (cset->casedn)(this, src, srclen, dst, dstlen);
}
+ uint caseup_multiply() const
+ {
+ return (cset->caseup_multiply)(this);
+ }
+
+ uint casedn_multiply() const
+ {
+ return (cset->casedn_multiply)(this);
+ }
+
size_t long10_to_str(char *dst, size_t dstlen,
int radix, long int val) const
{