summaryrefslogtreecommitdiff
path: root/include/m_ctype.h
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mysql.com>2010-11-26 13:44:39 +0300
committerAlexander Barkov <bar@mysql.com>2010-11-26 13:44:39 +0300
commite3dee8a7fd3e16147145b877917d4aa85346dfcb (patch)
tree9eecf2ef41882c89b5d01222dd068af85b4121b2 /include/m_ctype.h
parentce441751ed12a80aed10b8e5d718dac34d4c68b7 (diff)
downloadmariadb-git-e3dee8a7fd3e16147145b877917d4aa85346dfcb.tar.gz
Bug#57737 Character sets: search fails with like, contraction, index
Problem: LIKE over an indexed column optimized away good results, because my_like_range_utf32/utf16 returned wrong ranges for contractions. Contraction related code was missing in my_like_range_utf32/utf16, but did exist in my_like_range_ucs2/utf8. It was forgotten in utf32/utf16 versions (during mysql-6.0 push/revert mess). Fix: The patch removes individual functions my_like_range_ucs2, my_like_range_utf16, my_like_range_utf32 and introduces a single function my_like_range_generic() instead. The new function handles contractions correctly. It can handle any character set with cs->min_sort_char and cs->max_sort_char represented in Unicode code points. added: @ mysql-test/include/ctype_czech.inc @ mysql-test/include/ctype_like_ignorable.inc @ mysql-test/r/ctype_like_range.result @ mysql-test/t/ctype_like_range.test Adding tests modified: @ include/m_ctype.h - Adding helper functions for contractions. - Prototypes: removing ucs2,utf16,utf32 functions, adding generic function. @ mysql-test/r/ctype_uca.result @ mysql-test/r/ctype_utf16_uca.result @ mysql-test/r/ctype_utf32_uca.result @ mysql-test/t/ctype_uca.test @ mysql-test/t/ctype_utf16_uca.test @ mysql-test/t/ctype_utf32_uca.test - Adding tests. @ strings/ctype-mb.c - Pad function did not put the last character. - Implementing my_like_range_generic() - an universal replacement for three separate functions my_like_range_ucs2(), my_like_range_utf16() and my_like_range_utf32(), with correct contraction handling. @ strings/ctype-ucs2.c - my_fill_mb2 did not put the high byte, as previously it was used to put only characters in ASCII range. Now it puts high byte as well (needed to pupulate cs->max_sort_char correctly). - Adding DBUG_ASSERT() - Removing character set specific functions: my_like_range_ucs2(), my_like_range_utf16() and my_like_range_utf32(). - Using my_like_range_generic() instead of the old functions. @ strings/ctype-uca.c - Using generic function instead of the old character set specific ones. @ sql/item_create.cc @ sql/item_strfunc.cc @ sql/item_strfunc.h - Adding SQL functions LIKE_RANGE_MIN and LIKE_RANGE_MAX, available only in debug build to make sure like_range() works correctly for all character sets and collations.
Diffstat (limited to 'include/m_ctype.h')
-rw-r--r--include/m_ctype.h55
1 files changed, 35 insertions, 20 deletions
diff --git a/include/m_ctype.h b/include/m_ctype.h
index 42e8f88cc0e..c054de8d7fd 100644
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@@ -356,6 +356,32 @@ extern CHARSET_INFO my_charset_utf8mb4_unicode_ci;
#define MY_UTF8MB4 "utf8mb4"
+/* Helper functions to handle contraction */
+static inline my_bool
+my_cs_have_contractions(CHARSET_INFO *cs)
+{
+ return cs->contractions != NULL;
+}
+
+static inline my_bool
+my_cs_can_be_contraction_head(CHARSET_INFO *cs, my_wc_t wc)
+{
+ return ((const char *)cs->contractions)[0x40*0x40 + (wc & 0xFF)];
+}
+
+static inline my_bool
+my_cs_can_be_contraction_tail(CHARSET_INFO *cs, my_wc_t wc)
+{
+ return ((const char *)cs->contractions)[0x40*0x40 + (wc & 0xFF)];
+}
+
+static inline uint16*
+my_cs_contraction2_weight(CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2)
+{
+ return &cs->contractions[(wc1 - 0x40) * 0x40 + wc2 - 0x40];
+}
+
+
/* declarations for simple charsets */
extern size_t my_strnxfrm_simple(CHARSET_INFO *, uchar *, size_t,
const uchar *, size_t);
@@ -430,6 +456,7 @@ ulonglong my_strntoull10rnd_ucs2(CHARSET_INFO *cs,
void my_fill_8bit(CHARSET_INFO *cs, char* to, size_t l, int fill);
+/* For 8-bit character set */
my_bool my_like_range_simple(CHARSET_INFO *cs,
const char *ptr, size_t ptr_length,
pbool escape, pbool w_one, pbool w_many,
@@ -437,6 +464,7 @@ my_bool my_like_range_simple(CHARSET_INFO *cs,
char *min_str, char *max_str,
size_t *min_length, size_t *max_length);
+/* For ASCII-based multi-byte character sets with mbminlen=1 */
my_bool my_like_range_mb(CHARSET_INFO *cs,
const char *ptr, size_t ptr_length,
pbool escape, pbool w_one, pbool w_many,
@@ -444,26 +472,13 @@ my_bool my_like_range_mb(CHARSET_INFO *cs,
char *min_str, char *max_str,
size_t *min_length, size_t *max_length);
-my_bool my_like_range_ucs2(CHARSET_INFO *cs,
- const char *ptr, size_t ptr_length,
- pbool escape, pbool w_one, pbool w_many,
- size_t res_length,
- char *min_str, char *max_str,
- size_t *min_length, size_t *max_length);
-
-my_bool my_like_range_utf16(CHARSET_INFO *cs,
- const char *ptr, size_t ptr_length,
- pbool escape, pbool w_one, pbool w_many,
- size_t res_length,
- char *min_str, char *max_str,
- size_t *min_length, size_t *max_length);
-
-my_bool my_like_range_utf32(CHARSET_INFO *cs,
- const char *ptr, size_t ptr_length,
- pbool escape, pbool w_one, pbool w_many,
- size_t res_length,
- char *min_str, char *max_str,
- size_t *min_length, size_t *max_length);
+/* For other character sets, with arbitrary mbminlen and mbmaxlen numbers */
+my_bool my_like_range_generic(CHARSET_INFO *cs,
+ const char *ptr, size_t ptr_length,
+ pbool escape, pbool w_one, pbool w_many,
+ size_t res_length,
+ char *min_str, char *max_str,
+ size_t *min_length, size_t *max_length);
int my_wildcmp_8bit(CHARSET_INFO *,
const char *str,const char *str_end,