diff options
author | Alexander Barkov <bar@mysql.com> | 2010-11-26 13:44:39 +0300 |
---|---|---|
committer | Alexander Barkov <bar@mysql.com> | 2010-11-26 13:44:39 +0300 |
commit | e3dee8a7fd3e16147145b877917d4aa85346dfcb (patch) | |
tree | 9eecf2ef41882c89b5d01222dd068af85b4121b2 /include | |
parent | ce441751ed12a80aed10b8e5d718dac34d4c68b7 (diff) | |
download | mariadb-git-e3dee8a7fd3e16147145b877917d4aa85346dfcb.tar.gz |
Bug#57737 Character sets: search fails with like, contraction, index
Problem: LIKE over an indexed column optimized away good results,
because my_like_range_utf32/utf16 returned wrong ranges for contractions.
Contraction related code was missing in my_like_range_utf32/utf16,
but did exist in my_like_range_ucs2/utf8.
It was forgotten in utf32/utf16 versions (during mysql-6.0 push/revert mess).
Fix:
The patch removes individual functions my_like_range_ucs2,
my_like_range_utf16, my_like_range_utf32 and introduces a single function
my_like_range_generic() instead. The new function handles contractions
correctly. It can handle any character set with cs->min_sort_char and
cs->max_sort_char represented in Unicode code points.
added:
@ mysql-test/include/ctype_czech.inc
@ mysql-test/include/ctype_like_ignorable.inc
@ mysql-test/r/ctype_like_range.result
@ mysql-test/t/ctype_like_range.test
Adding tests
modified:
@ include/m_ctype.h
- Adding helper functions for contractions.
- Prototypes: removing ucs2,utf16,utf32 functions, adding generic function.
@ mysql-test/r/ctype_uca.result
@ mysql-test/r/ctype_utf16_uca.result
@ mysql-test/r/ctype_utf32_uca.result
@ mysql-test/t/ctype_uca.test
@ mysql-test/t/ctype_utf16_uca.test
@ mysql-test/t/ctype_utf32_uca.test
- Adding tests.
@ strings/ctype-mb.c
- Pad function did not put the last character.
- Implementing my_like_range_generic() - an universal replacement
for three separate functions
my_like_range_ucs2(), my_like_range_utf16() and my_like_range_utf32(),
with correct contraction handling.
@ strings/ctype-ucs2.c
- my_fill_mb2 did not put the high byte, as previously
it was used to put only characters in ASCII range.
Now it puts high byte as well
(needed to pupulate cs->max_sort_char correctly).
- Adding DBUG_ASSERT()
- Removing character set specific functions:
my_like_range_ucs2(), my_like_range_utf16() and my_like_range_utf32().
- Using my_like_range_generic() instead of the old functions.
@ strings/ctype-uca.c
- Using generic function instead of the old character set specific ones.
@ sql/item_create.cc
@ sql/item_strfunc.cc
@ sql/item_strfunc.h
- Adding SQL functions LIKE_RANGE_MIN and LIKE_RANGE_MAX,
available only in debug build to make sure like_range()
works correctly for all character sets and collations.
Diffstat (limited to 'include')
-rw-r--r-- | include/m_ctype.h | 55 |
1 files changed, 35 insertions, 20 deletions
diff --git a/include/m_ctype.h b/include/m_ctype.h index 42e8f88cc0e..c054de8d7fd 100644 --- a/include/m_ctype.h +++ b/include/m_ctype.h @@ -356,6 +356,32 @@ extern CHARSET_INFO my_charset_utf8mb4_unicode_ci; #define MY_UTF8MB4 "utf8mb4" +/* Helper functions to handle contraction */ +static inline my_bool +my_cs_have_contractions(CHARSET_INFO *cs) +{ + return cs->contractions != NULL; +} + +static inline my_bool +my_cs_can_be_contraction_head(CHARSET_INFO *cs, my_wc_t wc) +{ + return ((const char *)cs->contractions)[0x40*0x40 + (wc & 0xFF)]; +} + +static inline my_bool +my_cs_can_be_contraction_tail(CHARSET_INFO *cs, my_wc_t wc) +{ + return ((const char *)cs->contractions)[0x40*0x40 + (wc & 0xFF)]; +} + +static inline uint16* +my_cs_contraction2_weight(CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2) +{ + return &cs->contractions[(wc1 - 0x40) * 0x40 + wc2 - 0x40]; +} + + /* declarations for simple charsets */ extern size_t my_strnxfrm_simple(CHARSET_INFO *, uchar *, size_t, const uchar *, size_t); @@ -430,6 +456,7 @@ ulonglong my_strntoull10rnd_ucs2(CHARSET_INFO *cs, void my_fill_8bit(CHARSET_INFO *cs, char* to, size_t l, int fill); +/* For 8-bit character set */ my_bool my_like_range_simple(CHARSET_INFO *cs, const char *ptr, size_t ptr_length, pbool escape, pbool w_one, pbool w_many, @@ -437,6 +464,7 @@ my_bool my_like_range_simple(CHARSET_INFO *cs, char *min_str, char *max_str, size_t *min_length, size_t *max_length); +/* For ASCII-based multi-byte character sets with mbminlen=1 */ my_bool my_like_range_mb(CHARSET_INFO *cs, const char *ptr, size_t ptr_length, pbool escape, pbool w_one, pbool w_many, @@ -444,26 +472,13 @@ my_bool my_like_range_mb(CHARSET_INFO *cs, char *min_str, char *max_str, size_t *min_length, size_t *max_length); -my_bool my_like_range_ucs2(CHARSET_INFO *cs, - const char *ptr, size_t ptr_length, - pbool escape, pbool w_one, pbool w_many, - size_t res_length, - char *min_str, char *max_str, - size_t *min_length, size_t *max_length); - -my_bool my_like_range_utf16(CHARSET_INFO *cs, - const char *ptr, size_t ptr_length, - pbool escape, pbool w_one, pbool w_many, - size_t res_length, - char *min_str, char *max_str, - size_t *min_length, size_t *max_length); - -my_bool my_like_range_utf32(CHARSET_INFO *cs, - const char *ptr, size_t ptr_length, - pbool escape, pbool w_one, pbool w_many, - size_t res_length, - char *min_str, char *max_str, - size_t *min_length, size_t *max_length); +/* For other character sets, with arbitrary mbminlen and mbmaxlen numbers */ +my_bool my_like_range_generic(CHARSET_INFO *cs, + const char *ptr, size_t ptr_length, + pbool escape, pbool w_one, pbool w_many, + size_t res_length, + char *min_str, char *max_str, + size_t *min_length, size_t *max_length); int my_wildcmp_8bit(CHARSET_INFO *, const char *str,const char *str_end, |