From e497d6e6e1a45ffdd235e965c54ba8354bb18b83 Mon Sep 17 00:00:00 2001 From: Alexander Barkov Date: Mon, 26 Jul 2010 09:06:18 +0400 Subject: Bug#45012 my_like_range_cp932 generates invalid string Problem: The functions my_like_range_xxx() returned badly formed maximum strings for Asian character sets, which made problems for storage engines. Fix: - Removed a number my_like_range_xxx() implementations, which were in fact dumplicate code pieces. - Using generic my_like_range_mb() instead. - Setting max_sort_char member properly for Asian character sets - Adding unittest/strings/strings-t.c, to test that my_like_range_xxx() return well-formed min and max strings. Notes: - No additional tests in mysql/t/ available. Old tests cover the affected code well enough. --- strings/ctype-big5.c | 86 ++----------------------------------------------- strings/ctype-cp932.c | 76 ++----------------------------------------- strings/ctype-euc_kr.c | 4 +-- strings/ctype-eucjpms.c | 4 +-- strings/ctype-gb2312.c | 4 +-- strings/ctype-gbk.c | 86 ++----------------------------------------------- strings/ctype-mb.c | 22 +++++++++---- strings/ctype-sjis.c | 85 ++---------------------------------------------- strings/ctype-ujis.c | 4 +-- 9 files changed, 36 insertions(+), 335 deletions(-) (limited to 'strings') diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c index 3da307b82fc..2cb40c266d2 100644 --- a/strings/ctype-big5.c +++ b/strings/ctype-big5.c @@ -377,86 +377,6 @@ static int my_strxfrm_big5(uchar *dest, const uchar *src, int len) #endif -/* -** Calculate min_str and max_str that ranges a LIKE string. -** Arguments: -** ptr Pointer to LIKE string. -** ptr_length Length of LIKE string. -** escape Escape character in LIKE. (Normally '\'). -** All escape characters should be removed from min_str and max_str -** res_length Length of min_str and max_str. -** min_str Smallest case sensitive string that ranges LIKE. -** Should be space padded to res_length. -** max_str Largest case sensitive string that ranges LIKE. -** Normally padded with the biggest character sort value. -** -** The function should return 0 if ok and 1 if the LIKE string can't be -** optimized ! -*/ - -#define max_sort_char ((char) 255) - -static my_bool my_like_range_big5(CHARSET_INFO *cs __attribute__((unused)), - const char *ptr,size_t ptr_length, - pbool escape, pbool w_one, pbool w_many, - size_t res_length, - char *min_str, char *max_str, - size_t *min_length, size_t *max_length) -{ - const char *end= ptr + ptr_length; - char *min_org=min_str; - char *min_end=min_str+res_length; - size_t charlen= res_length / cs->mbmaxlen; - - for (; ptr != end && min_str != min_end && charlen > 0; ptr++, charlen--) - { - if (ptr+1 != end && isbig5code(ptr[0],ptr[1])) - { - *min_str++= *max_str++ = *ptr++; - *min_str++= *max_str++ = *ptr; - continue; - } - if (*ptr == escape && ptr+1 != end) - { - ptr++; /* Skip escape */ - if (isbig5code(ptr[0], ptr[1])) - *min_str++= *max_str++ = *ptr++; - if (min_str < min_end) - *min_str++= *max_str++= *ptr; - continue; - } - if (*ptr == w_one) /* '_' in SQL */ - { - *min_str++='\0'; /* This should be min char */ - *max_str++=max_sort_char; - continue; - } - if (*ptr == w_many) /* '%' in SQL */ - { - /* - Calculate length of keys: - 'a\0\0... is the smallest possible string when we have space expand - a\ff\ff... is the biggest possible string - */ - *min_length= ((cs->state & MY_CS_BINSORT) ? (size_t) (min_str - min_org) : - res_length); - *max_length= res_length; - do { - *min_str++ = 0; - *max_str++ = max_sort_char; - } while (min_str != min_end); - return 0; - } - *min_str++= *max_str++ = *ptr; - } - - *min_length= *max_length= (size_t) (min_str-min_org); - while (min_str != min_end) - *min_str++= *max_str++= ' '; - return 0; -} - - static uint ismbchar_big5(CHARSET_INFO *cs __attribute__((unused)), const char* p, const char *e) { @@ -6338,7 +6258,7 @@ static MY_COLLATION_HANDLER my_collation_big5_chinese_ci_handler = my_strnncollsp_big5, my_strnxfrm_big5, my_strnxfrmlen_simple, - my_like_range_big5, + my_like_range_mb, my_wildcmp_mb, my_strcasecmp_mb, my_instr_mb, @@ -6402,7 +6322,7 @@ CHARSET_INFO my_charset_big5_chinese_ci= 1, /* mbminlen */ 2, /* mbmaxlen */ 0, /* min_sort_char */ - 255, /* max_sort_char */ + 0xF9D5, /* max_sort_char */ ' ', /* pad char */ 1, /* escape_with_backslash_is_dangerous */ &my_charset_big5_handler, @@ -6435,7 +6355,7 @@ CHARSET_INFO my_charset_big5_bin= 1, /* mbminlen */ 2, /* mbmaxlen */ 0, /* min_sort_char */ - 255, /* max_sort_char */ + 0xF9FE, /* max_sort_char */ ' ', /* pad char */ 1, /* escape_with_backslash_is_dangerous */ &my_charset_big5_handler, diff --git a/strings/ctype-cp932.c b/strings/ctype-cp932.c index 07191c436b7..238c6f61baa 100644 --- a/strings/ctype-cp932.c +++ b/strings/ctype-cp932.c @@ -306,76 +306,6 @@ static size_t my_strnxfrm_cp932(CHARSET_INFO *cs __attribute__((unused)), } -/* -** Calculate min_str and max_str that ranges a LIKE string. -** Arguments: -** ptr Pointer to LIKE string. -** ptr_length Length of LIKE string. -** escape Escape character in LIKE. (Normally '\'). -** All escape characters should be removed from min_str and max_str -** res_length Length of min_str and max_str. -** min_str Smallest case sensitive string that ranges LIKE. -** Should be space padded to res_length. -** max_str Largest case sensitive string that ranges LIKE. -** Normally padded with the biggest character sort value. -** -** The function should return 0 if ok and 1 if the LIKE string can't be -** optimized ! -*/ - -#define max_sort_char ((char) 255) - -static my_bool my_like_range_cp932(CHARSET_INFO *cs __attribute__((unused)), - const char *ptr,size_t ptr_length, - pbool escape, pbool w_one, pbool w_many, - size_t res_length, - char *min_str,char *max_str, - size_t *min_length, size_t *max_length) -{ - const char *end=ptr+ptr_length; - char *min_org=min_str; - char *min_end=min_str+res_length; - - while (ptr < end && min_str < min_end) { - if (ismbchar_cp932(cs, ptr, end)) { - *min_str++ = *max_str++ = *ptr++; - if (min_str < min_end) - *min_str++ = *max_str++ = *ptr++; - continue; - } - if (*ptr == escape && ptr+1 < end) { - ptr++; /* Skip escape */ - if (ismbchar_cp932(cs, ptr, end)) - *min_str++ = *max_str++ = *ptr++; - if (min_str < min_end) - *min_str++ = *max_str++ = *ptr++; - continue; - } - if (*ptr == w_one) { /* '_' in SQL */ - *min_str++ = '\0'; /* This should be min char */ - *max_str++ = max_sort_char; - ptr++; - continue; - } - if (*ptr == w_many) - { /* '%' in SQL */ - *min_length = (size_t)(min_str - min_org); - *max_length = res_length; - do - { - *min_str++= 0; - *max_str++= max_sort_char; - } while (min_str < min_end); - return 0; - } - *min_str++ = *max_str++ = *ptr++; - } - *min_length = *max_length = (size_t) (min_str - min_org); - while (min_str < min_end) - *min_str++ = *max_str++ = ' '; /* Because if key compression */ - return 0; -} - /* page 0 0x00A1-0x00DF */ static uint16 tab_cp932_uni0[]={ 0xFF61,0xFF62,0xFF63,0xFF64,0xFF65,0xFF66,0xFF67,0xFF68, @@ -5467,7 +5397,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = my_strnncollsp_cp932, my_strnxfrm_cp932, my_strnxfrmlen_simple, - my_like_range_cp932, + my_like_range_mb, my_wildcmp_mb, /* wildcmp */ my_strcasecmp_8bit, my_instr_mb, @@ -5533,7 +5463,7 @@ CHARSET_INFO my_charset_cp932_japanese_ci= 1, /* mbminlen */ 2, /* mbmaxlen */ 0, /* min_sort_char */ - 255, /* max_sort_char */ + 0xFCFC, /* max_sort_char */ ' ', /* pad char */ 1, /* escape_with_backslash_is_dangerous */ &my_charset_handler, @@ -5565,7 +5495,7 @@ CHARSET_INFO my_charset_cp932_bin= 1, /* mbminlen */ 2, /* mbmaxlen */ 0, /* min_sort_char */ - 255, /* max_sort_char */ + 0xFCFC, /* max_sort_char */ ' ', /* pad char */ 1, /* escape_with_backslash_is_dangerous */ &my_charset_handler, diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c index fc0af7e35d5..ee957304716 100644 --- a/strings/ctype-euc_kr.c +++ b/strings/ctype-euc_kr.c @@ -8762,7 +8762,7 @@ CHARSET_INFO my_charset_euckr_korean_ci= 1, /* mbminlen */ 2, /* mbmaxlen */ 0, /* min_sort_char */ - 255, /* max_sort_char */ + 0xFEFE, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ &my_charset_handler, @@ -8795,7 +8795,7 @@ CHARSET_INFO my_charset_euckr_bin= 1, /* mbminlen */ 2, /* mbmaxlen */ 0, /* min_sort_char */ - 255, /* max_sort_char */ + 0xFEFE, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ &my_charset_handler, diff --git a/strings/ctype-eucjpms.c b/strings/ctype-eucjpms.c index b8b04dfca6b..615981b4d27 100644 --- a/strings/ctype-eucjpms.c +++ b/strings/ctype-eucjpms.c @@ -8710,7 +8710,7 @@ CHARSET_INFO my_charset_eucjpms_japanese_ci= 1, /* mbminlen */ 3, /* mbmaxlen */ 0, /* min_sort_char */ - 255, /* max_sort_char */ + 0xFEFE, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ &my_charset_handler, @@ -8743,7 +8743,7 @@ CHARSET_INFO my_charset_eucjpms_bin= 1, /* mbminlen */ 3, /* mbmaxlen */ 0, /* min_sort_char */ - 255, /* max_sort_char */ + 0xFEFE, /* max_sort_char */ ' ', /* pad_char */ 0, /* escape_with_backslash_is_dangerous */ &my_charset_handler, diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c index 0267f35ff5c..84f67dbbc2e 100644 --- a/strings/ctype-gb2312.c +++ b/strings/ctype-gb2312.c @@ -5790,7 +5790,7 @@ CHARSET_INFO my_charset_gb2312_chinese_ci= 1, /* mbminlen */ 2, /* mbmaxlen */ 0, /* min_sort_char */ - 255, /* max_sort_char */ + 0xF7FE, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ &my_charset_handler, @@ -5822,7 +5822,7 @@ CHARSET_INFO my_charset_gb2312_bin= 1, /* mbminlen */ 2, /* mbmaxlen */ 0, /* min_sort_char */ - 255, /* max_sort_char */ + 0xF7FE, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ &my_charset_handler, diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c index 7b8bb85652b..89607823d34 100644 --- a/strings/ctype-gbk.c +++ b/strings/ctype-gbk.c @@ -2690,86 +2690,6 @@ static size_t my_strnxfrm_gbk(CHARSET_INFO *cs __attribute__((unused)), } -/* -** Calculate min_str and max_str that ranges a LIKE string. -** Arguments: -** ptr Pointer to LIKE string. -** ptr_length Length of LIKE string. -** escape Escape character in LIKE. (Normally '\'). -** All escape characters should be removed from min_str and max_str -** res_length Length of min_str and max_str. -** min_str Smallest case sensitive string that ranges LIKE. -** Should be space padded to res_length. -** max_str Largest case sensitive string that ranges LIKE. -** Normally padded with the biggest character sort value. -** -** The function should return 0 if ok and 1 if the LIKE string can't be -** optimized ! -*/ - -#define max_sort_char ((uchar) 255) - -static my_bool my_like_range_gbk(CHARSET_INFO *cs __attribute__((unused)), - const char *ptr,size_t ptr_length, - pbool escape, pbool w_one, pbool w_many, - size_t res_length, - char *min_str,char *max_str, - size_t *min_length,size_t *max_length) -{ - const char *end= ptr + ptr_length; - char *min_org=min_str; - char *min_end=min_str+res_length; - size_t charlen= res_length / cs->mbmaxlen; - - for (; ptr != end && min_str != min_end && charlen > 0; ptr++, charlen--) - { - if (ptr+1 != end && isgbkcode(ptr[0],ptr[1])) - { - *min_str++= *max_str++ = *ptr++; - *min_str++= *max_str++ = *ptr; - continue; - } - if (*ptr == escape && ptr+1 != end) - { - ptr++; /* Skip escape */ - if (isgbkcode(ptr[0], ptr[1])) - *min_str++= *max_str++ = *ptr; - if (min_str < min_end) - *min_str++= *max_str++= *ptr; - continue; - } - if (*ptr == w_one) /* '_' in SQL */ - { - *min_str++='\0'; /* This should be min char */ - *max_str++=max_sort_char; - continue; - } - if (*ptr == w_many) /* '%' in SQL */ - { - /* - Calculate length of keys: - 'a\0\0... is the smallest possible string when we have space expand - a\ff\ff... is the biggest possible string - */ - *min_length= ((cs->state & MY_CS_BINSORT) ? (size_t) (min_str - min_org) : - res_length); - *max_length= res_length; - do { - *min_str++= 0; - *max_str++= max_sort_char; - } while (min_str != min_end); - return 0; - } - *min_str++= *max_str++ = *ptr; - } - - *min_length= *max_length = (size_t) (min_str - min_org); - while (min_str != min_end) - *min_str++= *max_str++= ' '; /* Because if key compression */ - return 0; -} - - static uint ismbchar_gbk(CHARSET_INFO *cs __attribute__((unused)), const char* p, const char *e) { @@ -9983,7 +9903,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = my_strnncollsp_gbk, my_strnxfrm_gbk, my_strnxfrmlen_simple, - my_like_range_gbk, + my_like_range_mb, my_wildcmp_mb, my_strcasecmp_mb, my_instr_mb, @@ -10048,7 +9968,7 @@ CHARSET_INFO my_charset_gbk_chinese_ci= 1, /* mbminlen */ 2, /* mbmaxlen */ 0, /* min_sort_char */ - 255, /* max_sort_char */ + 0xA967, /* max_sort_char */ ' ', /* pad char */ 1, /* escape_with_backslash_is_dangerous */ &my_charset_handler, @@ -10080,7 +10000,7 @@ CHARSET_INFO my_charset_gbk_bin= 1, /* mbminlen */ 2, /* mbmaxlen */ 0, /* min_sort_char */ - 255, /* max_sort_char */ + 0xFEFE, /* max_sort_char */ ' ', /* pad char */ 1, /* escape_with_backslash_is_dangerous */ &my_charset_handler, diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c index 903811e2ab9..e3788fc4dff 100644 --- a/strings/ctype-mb.c +++ b/strings/ctype-mb.c @@ -498,7 +498,9 @@ static void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)), DESCRIPTION Write max key: - for non-Unicode character sets: - just set to 255. + just bfill using max_sort_char if max_sort_char is one byte. + In case when max_sort_char is two bytes, fill with double-byte pairs + and optionally pad with a single space character. - for Unicode character set (utf-8): create a buffer with multibyte representation of the max_sort_char character, and copy it into max_str in a loop. @@ -510,12 +512,20 @@ static void pad_max_char(CHARSET_INFO *cs, char *str, char *end) if (!(cs->state & MY_CS_UNICODE)) { - bfill(str, end - str, 255); - return; + if (cs->max_sort_char <= 255) + { + bfill(str, end - str, cs->max_sort_char); + return; + } + buf[0]= cs->max_sort_char >> 8; + buf[1]= cs->max_sort_char & 0xFF; + buflen= 2; + } + else + { + buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (uchar*) buf, + (uchar*) buf + sizeof(buf)); } - - buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (uchar*) buf, - (uchar*) buf + sizeof(buf)); DBUG_ASSERT(buflen > 0); do diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c index ac426e0d7b5..3f479ffc102 100644 --- a/strings/ctype-sjis.c +++ b/strings/ctype-sjis.c @@ -304,85 +304,6 @@ static size_t my_strnxfrm_sjis(CHARSET_INFO *cs __attribute__((unused)), } -/* -** Calculate min_str and max_str that ranges a LIKE string. -** Arguments: -** ptr Pointer to LIKE string. -** ptr_length Length of LIKE string. -** escape Escape character in LIKE. (Normally '\'). -** All escape characters should be removed from min_str and max_str -** res_length Length of min_str and max_str. -** min_str Smallest case sensitive string that ranges LIKE. -** Should be space padded to res_length. -** max_str Largest case sensitive string that ranges LIKE. -** Normally padded with the biggest character sort value. -** -** The function should return 0 if ok and 1 if the LIKE string can't be -** optimized ! -*/ - -#define max_sort_char ((char) 255) - -static my_bool my_like_range_sjis(CHARSET_INFO *cs __attribute__((unused)), - const char *ptr,size_t ptr_length, - pbool escape, pbool w_one, pbool w_many, - size_t res_length, - char *min_str,char *max_str, - size_t *min_length,size_t *max_length) -{ - const char *end= ptr + ptr_length; - char *min_org=min_str; - char *min_end=min_str+res_length; - size_t charlen= res_length / cs->mbmaxlen; - - for ( ; ptr < end && min_str < min_end && charlen > 0 ; charlen--) - { - if (ismbchar_sjis(cs, ptr, end)) { - *min_str++ = *max_str++ = *ptr++; - if (min_str < min_end) - *min_str++ = *max_str++ = *ptr++; - continue; - } - if (*ptr == escape && ptr+1 < end) { - ptr++; /* Skip escape */ - if (ismbchar_sjis(cs, ptr, end)) - *min_str++ = *max_str++ = *ptr++; - if (min_str < min_end) - *min_str++ = *max_str++ = *ptr++; - continue; - } - if (*ptr == w_one) { /* '_' in SQL */ - *min_str++ = '\0'; /* This should be min char */ - *max_str++ = max_sort_char; - ptr++; - continue; - } - if (*ptr == w_many) - { /* '%' in SQL */ - /* - Calculate length of keys: - 'a\0\0... is the smallest possible string when we have space expand - a\ff\ff... is the biggest possible string - */ - *min_length= ((cs->state & MY_CS_BINSORT) ? (size_t) (min_str - min_org) : - res_length); - *max_length= res_length; - do - { - *min_str++= 0; - *max_str++= max_sort_char; - } while (min_str < min_end); - return 0; - } - *min_str++ = *max_str++ = *ptr++; - } - - *min_length= *max_length= (size_t) (min_str - min_org); - while (min_str != min_end) - *min_str++= *max_str++= ' '; /* Because if key compression */ - return 0; -} - /* page 0 0x00A1-0x00DF */ static uint16 tab_sjis_uni0[]={ 0xFF61,0xFF62,0xFF63,0xFF64,0xFF65,0xFF66,0xFF67,0xFF68, @@ -4628,7 +4549,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = my_strnncollsp_sjis, my_strnxfrm_sjis, my_strnxfrmlen_simple, - my_like_range_sjis, + my_like_range_mb, my_wildcmp_mb, /* wildcmp */ my_strcasecmp_8bit, my_instr_mb, @@ -4694,7 +4615,7 @@ CHARSET_INFO my_charset_sjis_japanese_ci= 1, /* mbminlen */ 2, /* mbmaxlen */ 0, /* min_sort_char */ - 255, /* max_sort_char */ + 0xFCFC, /* max_sort_char */ ' ', /* pad char */ 1, /* escape_with_backslash_is_dangerous */ &my_charset_handler, @@ -4726,7 +4647,7 @@ CHARSET_INFO my_charset_sjis_bin= 1, /* mbminlen */ 2, /* mbmaxlen */ 0, /* min_sort_char */ - 255, /* max_sort_char */ + 0xFCFC, /* max_sort_char */ ' ', /* pad char */ 1, /* escape_with_backslash_is_dangerous */ &my_charset_handler, diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c index 1a872a92595..4474bd0cf96 100644 --- a/strings/ctype-ujis.c +++ b/strings/ctype-ujis.c @@ -8567,7 +8567,7 @@ CHARSET_INFO my_charset_ujis_japanese_ci= 1, /* mbminlen */ 3, /* mbmaxlen */ 0, /* min_sort_char */ - 255, /* max_sort_char */ + 0xFEFE, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ &my_charset_handler, @@ -8600,7 +8600,7 @@ CHARSET_INFO my_charset_ujis_bin= 1, /* mbminlen */ 3, /* mbmaxlen */ 0, /* min_sort_char */ - 255, /* max_sort_char */ + 0xFEFE, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ &my_charset_handler, -- cgit v1.2.1