diff options
author | Alexander Barkov <bar@mariadb.com> | 2023-04-18 06:44:03 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mariadb.com> | 2023-04-18 06:44:03 +0400 |
commit | 30b4bb4204cb7d259eaff595c46e69d16b55adc7 (patch) | |
tree | ef44550550e96f564fe554a11cfbaa1a3e1d53bd /strings | |
parent | 2230c2e7aa47326da22bfee72da8022f8247e92a (diff) | |
download | mariadb-git-30b4bb4204cb7d259eaff595c46e69d16b55adc7.tar.gz |
MDEV-31068 Reuse duplicate case conversion code in ctype-utf8.c and ctype-ucs2.c
Diffstat (limited to 'strings')
-rw-r--r-- | strings/ctype-ucs2.c | 66 | ||||
-rw-r--r-- | strings/ctype-unidata.h | 56 | ||||
-rw-r--r-- | strings/ctype-utf8.c | 74 |
3 files changed, 78 insertions, 118 deletions
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index 4b29d656731..6b52ade4431 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -1285,24 +1285,6 @@ const char charset_name_utf16le[]= "utf16le"; #define charset_name_utf16le_length (sizeof(charset_name_utf16le)-1) static inline void -my_tolower_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc) -{ - MY_UNICASE_CHARACTER *page; - if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8])) - *wc= page[*wc & 0xFF].tolower; -} - - -static inline void -my_toupper_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc) -{ - MY_UNICASE_CHARACTER *page; - if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8])) - *wc= page[*wc & 0xFF].toupper; -} - - -static inline void my_tosort_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc) { if (*wc <= uni_plane->maxchar) @@ -1335,7 +1317,7 @@ my_caseup_utf16(CHARSET_INFO *cs, const char *src, size_t srclen, while ((src < srcend) && (res= mb_wc(cs, &wc, (uchar *) src, (uchar *) srcend)) > 0) { - my_toupper_utf16(uni_plane, &wc); + my_toupper_unicode(uni_plane, &wc); if (res != wc_mb(cs, wc, (uchar *) dst, (uchar *) dstend)) break; src+= res; @@ -1393,7 +1375,7 @@ my_casedn_utf16(CHARSET_INFO *cs, const char *src, size_t srclen, while ((src < srcend) && (res= mb_wc(cs, &wc, (uchar *) src, (uchar *) srcend)) > 0) { - my_tolower_utf16(uni_plane, &wc); + my_tolower_unicode(uni_plane, &wc); if (res != wc_mb(cs, wc, (uchar *) dst, (uchar *) dstend)) break; src+= res; @@ -2197,24 +2179,6 @@ my_uni_utf32(CHARSET_INFO *cs __attribute__((unused)), static inline void -my_tolower_utf32(MY_UNICASE_INFO *uni_plane, my_wc_t *wc) -{ - MY_UNICASE_CHARACTER *page; - if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8])) - *wc= page[*wc & 0xFF].tolower; -} - - -static inline void -my_toupper_utf32(MY_UNICASE_INFO *uni_plane, my_wc_t *wc) -{ - MY_UNICASE_CHARACTER *page; - if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8])) - *wc= page[*wc & 0xFF].toupper; -} - - -static inline void my_tosort_utf32(MY_UNICASE_INFO *uni_plane, my_wc_t *wc) { if (*wc <= uni_plane->maxchar) @@ -2256,7 +2220,7 @@ my_caseup_utf32(CHARSET_INFO *cs, const char *src, size_t srclen, while ((src < srcend) && (res= my_utf32_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0) { - my_toupper_utf32(uni_plane, &wc); + my_toupper_unicode(uni_plane, &wc); if (res != my_uni_utf32(cs, wc, (uchar*) dst, (uchar*) dstend)) break; src+= res; @@ -2312,7 +2276,7 @@ my_casedn_utf32(CHARSET_INFO *cs, const char *src, size_t srclen, while ((res= my_utf32_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0) { - my_tolower_utf32(uni_plane,&wc); + my_tolower_unicode(uni_plane,&wc); if (res != my_uni_utf32(cs, wc, (uchar*) dst, (uchar*) dstend)) break; src+= res; @@ -3119,24 +3083,6 @@ static int my_uni_ucs2(CHARSET_INFO *cs __attribute__((unused)) , static inline void -my_tolower_ucs2(MY_UNICASE_INFO *uni_plane, my_wc_t *wc) -{ - MY_UNICASE_CHARACTER *page; - if ((page= uni_plane->page[(*wc >> 8) & 0xFF])) - *wc= page[*wc & 0xFF].tolower; -} - - -static inline void -my_toupper_ucs2(MY_UNICASE_INFO *uni_plane, my_wc_t *wc) -{ - MY_UNICASE_CHARACTER *page; - if ((page= uni_plane->page[(*wc >> 8) & 0xFF])) - *wc= page[*wc & 0xFF].toupper; -} - - -static inline void my_tosort_ucs2(MY_UNICASE_INFO *uni_plane, my_wc_t *wc) { MY_UNICASE_CHARACTER *page; @@ -3157,7 +3103,7 @@ static size_t my_caseup_ucs2(CHARSET_INFO *cs, const char *src, size_t srclen, while ((src < srcend) && (res= my_ucs2_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0) { - my_toupper_ucs2(uni_plane, &wc); + my_toupper_unicode_bmp(uni_plane, &wc); if (res != my_uni_ucs2(cs, wc, (uchar*) dst, (uchar*) dstend)) break; src+= res; @@ -3208,7 +3154,7 @@ static size_t my_casedn_ucs2(CHARSET_INFO *cs, const char *src, size_t srclen, while ((src < srcend) && (res= my_ucs2_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0) { - my_tolower_ucs2(uni_plane, &wc); + my_tolower_unicode_bmp(uni_plane, &wc); if (res != my_uni_ucs2(cs, wc, (uchar*) dst, (uchar*) dstend)) break; src+= res; diff --git a/strings/ctype-unidata.h b/strings/ctype-unidata.h index 9900fd0cedd..df591b6cf83 100644 --- a/strings/ctype-unidata.h +++ b/strings/ctype-unidata.h @@ -24,6 +24,62 @@ extern MY_UNICASE_CHARACTER *my_unicase_default_pages[256]; extern MY_UNICASE_CHARACTER my_unicase_mysql500_page00[256]; extern MY_UNICASE_CHARACTER *my_unicase_mysql500_pages[256]; + +static inline my_wc_t my_u300_tolower_7bit(uchar ch) +{ + return my_unicase_default_page00[ch].tolower; +} + +static inline my_wc_t my_u300_toupper_7bit(uchar ch) +{ + return my_unicase_default_page00[ch].toupper; +} + + +static inline void +my_tolower_unicode_bmp(MY_UNICASE_INFO *uni_plane, my_wc_t *wc) +{ + const MY_UNICASE_CHARACTER *page; + DBUG_ASSERT(*wc <= uni_plane->maxchar); + if ((page= uni_plane->page[*wc >> 8])) + *wc= page[*wc & 0xFF].tolower; +} + + +static inline void +my_toupper_unicode_bmp(MY_UNICASE_INFO *uni_plane, my_wc_t *wc) +{ + const MY_UNICASE_CHARACTER *page; + DBUG_ASSERT(*wc <= uni_plane->maxchar); + if ((page= uni_plane->page[*wc >> 8])) + *wc= page[*wc & 0xFF].toupper; +} + + +static inline void +my_tolower_unicode(MY_UNICASE_INFO *uni_plane, my_wc_t *wc) +{ + if (*wc <= uni_plane->maxchar) + { + const MY_UNICASE_CHARACTER *page; + if ((page= uni_plane->page[(*wc >> 8)])) + *wc= page[*wc & 0xFF].tolower; + } +} + + +static inline void +my_toupper_unicode(MY_UNICASE_INFO *uni_plane, my_wc_t *wc) +{ + if (*wc <= uni_plane->maxchar) + { + const MY_UNICASE_CHARACTER *page; + if ((page= uni_plane->page[(*wc >> 8)])) + *wc= page[*wc & 0xFF].toupper; + } +} + + size_t my_strxfrm_pad_nweights_unicode(uchar *str, uchar *strend, size_t nweights); size_t my_strxfrm_pad_unicode(uchar *str, uchar *strend); diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index 06a10034a33..75c12e6354b 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -5207,24 +5207,6 @@ static int my_uni_utf8mb3_no_range(CHARSET_INFO *cs __attribute__((unused)), } -static inline void -my_tolower_utf8mb3(MY_UNICASE_INFO *uni_plane, my_wc_t *wc) -{ - MY_UNICASE_CHARACTER *page; - if ((page= uni_plane->page[(*wc >> 8) & 0xFF])) - *wc= page[*wc & 0xFF].tolower; -} - - -static inline void -my_toupper_utf8mb3(MY_UNICASE_INFO *uni_plane, my_wc_t *wc) -{ - MY_UNICASE_CHARACTER *page; - if ((page= uni_plane->page[(*wc >> 8) & 0xFF])) - *wc= page[*wc & 0xFF].toupper; -} - - static size_t my_caseup_utf8mb3(CHARSET_INFO *cs, const char *src, size_t srclen, char *dst, size_t dstlen) @@ -5239,7 +5221,7 @@ static size_t my_caseup_utf8mb3(CHARSET_INFO *cs, while ((src < srcend) && (srcres= my_utf8mb3_uni(cs, &wc, (uchar *) src, (uchar*) srcend)) > 0) { - my_toupper_utf8mb3(uni_plane, &wc); + my_toupper_unicode_bmp(uni_plane, &wc); if ((dstres= my_uni_utf8mb3(cs, wc, (uchar*) dst, (uchar*) dstend)) <= 0) break; src+= srcres; @@ -5292,7 +5274,7 @@ static size_t my_caseup_str_utf8mb3(CHARSET_INFO *cs, char *src) while (*src && (srcres= my_utf8mb3_uni_no_range(cs, &wc, (uchar *) src)) > 0) { - my_toupper_utf8mb3(uni_plane, &wc); + my_toupper_unicode_bmp(uni_plane, &wc); if ((dstres= my_uni_utf8mb3_no_range(cs, wc, (uchar*) dst)) <= 0) break; src+= srcres; @@ -5317,7 +5299,7 @@ static size_t my_casedn_utf8mb3(CHARSET_INFO *cs, while ((src < srcend) && (srcres= my_utf8mb3_uni(cs, &wc, (uchar*) src, (uchar*)srcend)) > 0) { - my_tolower_utf8mb3(uni_plane, &wc); + my_tolower_unicode_bmp(uni_plane, &wc); if ((dstres= my_uni_utf8mb3(cs, wc, (uchar*) dst, (uchar*) dstend)) <= 0) break; src+= srcres; @@ -5338,7 +5320,7 @@ static size_t my_casedn_str_utf8mb3(CHARSET_INFO *cs, char *src) while (*src && (srcres= my_utf8mb3_uni_no_range(cs, &wc, (uchar *) src)) > 0) { - my_tolower_utf8mb3(uni_plane, &wc); + my_tolower_unicode_bmp(uni_plane, &wc); if ((dstres= my_uni_utf8mb3_no_range(cs, wc, (uchar*) dst)) <= 0) break; src+= srcres; @@ -5397,7 +5379,7 @@ int my_strcasecmp_utf8mb3(CHARSET_INFO *cs, const char *s, const char *t) It represents a single byte character. Convert it into weight according to collation. */ - s_wc= my_unicase_default_page00[(uchar) s[0]].tolower; + s_wc= my_u300_tolower_7bit((uchar) s[0]); s++; } else @@ -5430,7 +5412,7 @@ int my_strcasecmp_utf8mb3(CHARSET_INFO *cs, const char *s, const char *t) s+= res; /* Convert Unicode code into weight according to collation */ - my_tolower_utf8mb3(uni_plane, &s_wc); + my_tolower_unicode_bmp(uni_plane, &s_wc); } @@ -5439,7 +5421,7 @@ int my_strcasecmp_utf8mb3(CHARSET_INFO *cs, const char *s, const char *t) if ((uchar) t[0] < 128) { /* Convert single byte character into weight */ - t_wc= my_unicase_default_page00[(uchar) t[0]].tolower; + t_wc= my_u300_tolower_7bit((uchar) t[0]); t++; } else @@ -5450,7 +5432,7 @@ int my_strcasecmp_utf8mb3(CHARSET_INFO *cs, const char *s, const char *t) t+= res; /* Convert code into weight */ - my_tolower_utf8mb3(uni_plane, &t_wc); + my_tolower_unicode_bmp(uni_plane, &t_wc); } /* Now we have two weights, let's compare them */ @@ -7678,30 +7660,6 @@ my_wc_mb_utf8mb4_no_range(CHARSET_INFO *cs __attribute__((unused)), } -static inline void -my_tolower_utf8mb4(MY_UNICASE_INFO *uni_plane, my_wc_t *wc) -{ - if (*wc <= uni_plane->maxchar) - { - MY_UNICASE_CHARACTER *page; - if ((page= uni_plane->page[(*wc >> 8)])) - *wc= page[*wc & 0xFF].tolower; - } -} - - -static inline void -my_toupper_utf8mb4(MY_UNICASE_INFO *uni_plane, my_wc_t *wc) -{ - if (*wc <= uni_plane->maxchar) - { - MY_UNICASE_CHARACTER *page; - if ((page= uni_plane->page[(*wc >> 8)])) - *wc= page[*wc & 0xFF].toupper; - } -} - - static size_t my_caseup_utf8mb4(CHARSET_INFO *cs, const char *src, size_t srclen, char *dst, size_t dstlen) @@ -7717,7 +7675,7 @@ my_caseup_utf8mb4(CHARSET_INFO *cs, const char *src, size_t srclen, (srcres= my_mb_wc_utf8mb4(cs, &wc, (uchar *) src, (uchar*) srcend)) > 0) { - my_toupper_utf8mb4(uni_plane, &wc); + my_toupper_unicode(uni_plane, &wc); if ((dstres= my_wc_mb_utf8mb4(cs, wc, (uchar*) dst, (uchar*) dstend)) <= 0) break; src+= srcres; @@ -7784,7 +7742,7 @@ my_caseup_str_utf8mb4(CHARSET_INFO *cs, char *src) while (*src && (srcres= my_mb_wc_utf8mb4_no_range(cs, &wc, (uchar *) src)) > 0) { - my_toupper_utf8mb4(uni_plane, &wc); + my_toupper_unicode(uni_plane, &wc); if ((dstres= my_wc_mb_utf8mb4_no_range(cs, wc, (uchar*) dst)) <= 0) break; src+= srcres; @@ -7811,7 +7769,7 @@ my_casedn_utf8mb4(CHARSET_INFO *cs, (srcres= my_mb_wc_utf8mb4(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0) { - my_tolower_utf8mb4(uni_plane, &wc); + my_tolower_unicode(uni_plane, &wc); if ((dstres= my_wc_mb_utf8mb4(cs, wc, (uchar*) dst, (uchar*) dstend)) <= 0) break; src+= srcres; @@ -7833,7 +7791,7 @@ my_casedn_str_utf8mb4(CHARSET_INFO *cs, char *src) while (*src && (srcres= my_mb_wc_utf8mb4_no_range(cs, &wc, (uchar *) src)) > 0) { - my_tolower_utf8mb4(uni_plane, &wc); + my_tolower_unicode(uni_plane, &wc); if ((dstres= my_wc_mb_utf8mb4_no_range(cs, wc, (uchar*) dst)) <= 0) break; src+= srcres; @@ -7888,7 +7846,7 @@ my_strcasecmp_utf8mb4(CHARSET_INFO *cs, const char *s, const char *t) It represents a single byte character. Convert it into weight according to collation. */ - s_wc= my_unicase_default_page00[(uchar) s[0]].tolower; + s_wc= my_u300_tolower_7bit((uchar) s[0]); s++; } else @@ -7903,7 +7861,7 @@ my_strcasecmp_utf8mb4(CHARSET_INFO *cs, const char *s, const char *t) return strcmp(s, t); s+= res; - my_tolower_utf8mb4(uni_plane, &s_wc); + my_tolower_unicode(uni_plane, &s_wc); } @@ -7912,7 +7870,7 @@ my_strcasecmp_utf8mb4(CHARSET_INFO *cs, const char *s, const char *t) if ((uchar) t[0] < 128) { /* Convert single byte character into weight */ - t_wc= my_unicase_default_page00[(uchar) t[0]].tolower; + t_wc= my_u300_tolower_7bit((uchar) t[0]); t++; } else @@ -7922,7 +7880,7 @@ my_strcasecmp_utf8mb4(CHARSET_INFO *cs, const char *s, const char *t) return strcmp(s, t); t+= res; - my_tolower_utf8mb4(uni_plane, &t_wc); + my_tolower_unicode(uni_plane, &t_wc); } /* Now we have two weights, let's compare them */ |