summaryrefslogtreecommitdiff
path: root/strings
diff options
context:
space:
mode:
authorunknown <bar@mysql.com>2005-09-21 23:10:51 +0500
committerunknown <bar@mysql.com>2005-09-21 23:10:51 +0500
commit14912660c39354c366419d65899bc95061d63e6e (patch)
tree519dad549da8beab9748d94f1ae4894caf051d8f /strings
parent4014d76cd1a7bbab348f2e47d852518e11526777 (diff)
downloadmariadb-git-14912660c39354c366419d65899bc95061d63e6e.tar.gz
Bug#13046: LIKE pattern matching using prefix
index doesn't return correct result item_cmpfunc.cc: Use charset of LIKE to decide whether to use 8bit or Unicode "escape" value. But use charset of "escape" to scan escape character. strings/ctype-xxx.c: We cannot reduce "end" pointer using charpos(), because of possible escape characters in the string. Limit the loop using count of written characters instead. ctype_like_escape.inc: new file mysql-test/t/ctype_xxx: mysql-test/r/ctype_xxx: Adding test case. strings/ctype-big5.c: Bug#13046: LIKE pattern matching using prefix index doesn't return correct result We cannot change "end" pointer using charpos(), because of possible escape characters. Use limit by count of written characters instead. strings/ctype-cp932.c: Bug#13046: LIKE pattern matching using prefix index doesn't return correct result We cannot change "end" pointer using charpos(), because of possible escape characters. Use limit by count of written characters instead strings/ctype-gbk.c: Bug#13046: LIKE pattern matching using prefix index doesn't return correct result We cannot change "end" pointer using charpos(), because of possible escape characters. Use limit by count of written characters instead strings/ctype-mb.c: Bug#13046: LIKE pattern matching using prefix index doesn't return correct result We cannot change "end" pointer using charpos(), because of possible escape characters. Use limit by count of written characters instead strings/ctype-simple.c: Bug#13046: LIKE pattern matching using prefix index doesn't return correct result We cannot change "end" pointer using charpos(), because of possible escape characters. Use limit by count of written characters instead strings/ctype-sjis.c: Bug#13046: LIKE pattern matching using prefix index doesn't return correct result We cannot change "end" pointer using charpos(), because of possible escape characters. Use limit by count of written characters instead strings/ctype-tis620.c: Bug#13046: LIKE pattern matching using prefix index doesn't return correct result We cannot change "end" pointer using charpos(), because of possible escape characters. Use limit by count of written characters instead strings/ctype-ucs2.c: Bug#13046: LIKE pattern matching using prefix index doesn't return correct result We cannot change "end" pointer using charpos(), because of possible escape characters. Use limit by count of written characters instead mysql-test/t/ctype_big5.test: Adding test case. mysql-test/t/ctype_cp932.test: Adding test case. mysql-test/t/ctype_gbk.test: Adding test case. mysql-test/t/ctype_latin1.test: Adding test case. mysql-test/t/ctype_sjis.test: Adding test case. mysql-test/t/ctype_tis620.test: Adding test case. mysql-test/t/ctype_uca.test: Adding test case. mysql-test/t/ctype_ucs.test: Adding test case. mysql-test/t/ctype_ujis.test: Adding test case. mysql-test/t/ctype_utf8.test: Adding test case. mysql-test/r/ctype_big5.result: Adding test case. mysql-test/r/ctype_cp932.result: Adding test case. mysql-test/r/ctype_gbk.result: Adding test case. mysql-test/r/ctype_latin1.result: Adding test case. mysql-test/r/ctype_sjis.result: Adding test case. mysql-test/r/ctype_tis620.result: Adding test case. mysql-test/r/ctype_uca.result: Adding test case. mysql-test/r/ctype_ucs.result: Adding test case. mysql-test/r/ctype_ujis.result: Adding test case. mysql-test/r/ctype_utf8.result: Adding test case. sql/item_cmpfunc.cc: More fixes.
Diffstat (limited to 'strings')
-rw-r--r--strings/ctype-big5.c15
-rw-r--r--strings/ctype-cp932.c11
-rw-r--r--strings/ctype-gbk.c15
-rw-r--r--strings/ctype-mb.c17
-rw-r--r--strings/ctype-simple.c11
-rw-r--r--strings/ctype-sjis.c11
-rw-r--r--strings/ctype-tis620.c71
-rw-r--r--strings/ctype-ucs2.c6
8 files changed, 36 insertions, 121 deletions
diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c
index 76a4e197405..08b0ff009ee 100644
--- a/strings/ctype-big5.c
+++ b/strings/ctype-big5.c
@@ -392,16 +392,12 @@ static my_bool my_like_range_big5(CHARSET_INFO *cs __attribute__((unused)),
uint res_length, char *min_str,char *max_str,
uint *min_length,uint *max_length)
{
- const char *end;
+ const char *end= ptr + ptr_length;
char *min_org=min_str;
char *min_end=min_str+res_length;
- uint charlen= my_charpos(cs, ptr, ptr+ptr_length, res_length/cs->mbmaxlen);
+ uint charlen= res_length / cs->mbmaxlen;
- if (charlen < ptr_length)
- ptr_length= charlen;
- end= ptr + ptr_length;
-
- for (; ptr != end && min_str != min_end ; ptr++)
+ for (; ptr != end && min_str != min_end && charlen > 0; ptr++, charlen--)
{
if (ptr+1 != end && isbig5code(ptr[0],ptr[1]))
{
@@ -412,7 +408,10 @@ static my_bool my_like_range_big5(CHARSET_INFO *cs __attribute__((unused)),
if (*ptr == escape && ptr+1 != end)
{
ptr++; /* Skip escape */
- *min_str++= *max_str++ = *ptr;
+ if (isbig5code(ptr[0], ptr[1]))
+ *min_str++= *max_str++ = *ptr++;
+ if (min_str < min_end)
+ *min_str++= *max_str++= *ptr;
continue;
}
if (*ptr == w_one) /* '_' in SQL */
diff --git a/strings/ctype-cp932.c b/strings/ctype-cp932.c
index e476130b706..63f95a28037 100644
--- a/strings/ctype-cp932.c
+++ b/strings/ctype-cp932.c
@@ -322,16 +322,13 @@ static my_bool my_like_range_cp932(CHARSET_INFO *cs __attribute__((unused)),
uint res_length, char *min_str,char *max_str,
uint *min_length,uint *max_length)
{
- const char *end;
+ const char *end= ptr + ptr_length;
char *min_org=min_str;
char *min_end=min_str+res_length;
- uint charlen= my_charpos(cs, ptr, ptr+ptr_length, res_length/cs->mbmaxlen);
+ uint charlen= res_length / cs->mbmaxlen;
- if (charlen < ptr_length)
- ptr_length= charlen;
- end= ptr + ptr_length;
-
- while (ptr < end && min_str < min_end) {
+ for ( ; ptr < end && min_str < min_end && charlen > 0 ; charlen--)
+ {
if (ismbchar_cp932(cs, ptr, end)) {
*min_str++ = *max_str++ = *ptr++;
if (min_str < min_end)
diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c
index 82c76b8ee96..b5b86984794 100644
--- a/strings/ctype-gbk.c
+++ b/strings/ctype-gbk.c
@@ -2705,16 +2705,12 @@ static my_bool my_like_range_gbk(CHARSET_INFO *cs __attribute__((unused)),
uint res_length, char *min_str,char *max_str,
uint *min_length,uint *max_length)
{
- const char *end;
+ const char *end= ptr + ptr_length;
char *min_org=min_str;
char *min_end=min_str+res_length;
- uint charlen= my_charpos(cs, ptr, ptr+ptr_length, res_length/cs->mbmaxlen);
+ uint charlen= res_length / cs->mbmaxlen;
- if (charlen < ptr_length)
- ptr_length= charlen;
- end= ptr + ptr_length;
-
- for (; ptr != end && min_str != min_end ; ptr++)
+ for (; ptr != end && min_str != min_end && charlen > 0; ptr++, charlen--)
{
if (ptr+1 != end && isgbkcode(ptr[0],ptr[1]))
{
@@ -2725,7 +2721,10 @@ static my_bool my_like_range_gbk(CHARSET_INFO *cs __attribute__((unused)),
if (*ptr == escape && ptr+1 != end)
{
ptr++; /* Skip escape */
- *min_str++= *max_str++ = *ptr;
+ if (isgbkcode(ptr[0], ptr[1]))
+ *min_str++= *max_str++ = *ptr;
+ if (min_str < min_end)
+ *min_str++= *max_str++= *ptr;
continue;
}
if (*ptr == w_one) /* '_' in SQL */
diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c
index 4b22f158284..eb032759d25 100644
--- a/strings/ctype-mb.c
+++ b/strings/ctype-mb.c
@@ -502,17 +502,13 @@ my_bool my_like_range_mb(CHARSET_INFO *cs,
char *min_str,char *max_str,
uint *min_length,uint *max_length)
{
- const char *end;
+ const char *end= ptr + ptr_length;
char *min_org= min_str;
char *min_end= min_str + res_length;
char *max_end= max_str + res_length;
- uint charlen= my_charpos(cs, ptr, ptr+ptr_length, res_length/cs->mbmaxlen);
+ uint charlen= res_length / cs->mbmaxlen;
- if (charlen < ptr_length)
- ptr_length= charlen;
- end= ptr + ptr_length;
-
- for (; ptr != end && min_str != min_end ; ptr++)
+ for (; ptr != end && min_str != min_end && charlen > 0 ; ptr++, charlen--)
{
if (*ptr == escape && ptr+1 != end)
{
@@ -522,14 +518,8 @@ my_bool my_like_range_mb(CHARSET_INFO *cs,
}
if (*ptr == w_one || *ptr == w_many) /* '_' and '%' in SQL */
{
- charlen= my_charpos(cs, min_org, min_str, res_length/cs->mbmaxlen);
-
- if (charlen < (uint) (min_str - min_org))
- min_str= min_org + charlen;
-
/* Write min key */
*min_length= (uint) (min_str - min_org);
- *max_length= res_length;
do
{
*min_str++= (char) cs->min_sort_char;
@@ -540,6 +530,7 @@ my_bool my_like_range_mb(CHARSET_INFO *cs,
representation of the max_sort_char character,
and copy it into max_str in a loop.
*/
+ *max_length= res_length;
pad_max_char(cs, max_str, max_end);
return 0;
}
diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c
index af673b78254..efddab621f2 100644
--- a/strings/ctype-simple.c
+++ b/strings/ctype-simple.c
@@ -997,17 +997,12 @@ my_bool my_like_range_simple(CHARSET_INFO *cs,
char *min_str,char *max_str,
uint *min_length,uint *max_length)
{
- const char *end;
+ const char *end= ptr + ptr_length;
char *min_org=min_str;
char *min_end=min_str+res_length;
-#ifdef USE_MB
- uint charlen= my_charpos(cs, ptr, ptr+ptr_length, res_length/cs->mbmaxlen);
- if (charlen < ptr_length)
- ptr_length= charlen;
-#endif
- end= ptr + ptr_length;
+ uint charlen= res_length / cs->mbmaxlen;
- for (; ptr != end && min_str != min_end ; ptr++)
+ for (; ptr != end && min_str != min_end && charlen > 0 ; ptr++, charlen--)
{
if (*ptr == escape && ptr+1 != end)
{
diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c
index 4342fc670df..da79f1796b8 100644
--- a/strings/ctype-sjis.c
+++ b/strings/ctype-sjis.c
@@ -322,16 +322,13 @@ static my_bool my_like_range_sjis(CHARSET_INFO *cs __attribute__((unused)),
uint res_length, char *min_str,char *max_str,
uint *min_length,uint *max_length)
{
- const char *end;
+ const char *end= ptr + ptr_length;
char *min_org=min_str;
char *min_end=min_str+res_length;
- uint charlen= my_charpos(cs, ptr, ptr+ptr_length, res_length/cs->mbmaxlen);
+ uint charlen= res_length / cs->mbmaxlen;
- if (charlen < ptr_length)
- ptr_length= charlen;
- end= ptr + ptr_length;
-
- while (ptr < end && min_str < min_end) {
+ for ( ; ptr < end && min_str < min_end && charlen > 0 ; charlen--)
+ {
if (ismbchar_sjis(cs, ptr, end)) {
*min_str++ = *max_str++ = *ptr++;
if (min_str < min_end)
diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c
index 208168bb946..dcb0e0525b4 100644
--- a/strings/ctype-tis620.c
+++ b/strings/ctype-tis620.c
@@ -641,71 +641,6 @@ int my_strnxfrm_tis620(CHARSET_INFO *cs __attribute__((unused)),
}
-
-/*
- Convert SQL LIKE string to C string
-
- Arg: String, its length, escape character, resource length,
- minimal string and maximum string
- Ret: Always 0
-
- IMPLEMENTATION
- We just copy this function from opt_range.cc. No need to convert to
- thai2sortable string. min_str and max_str will be use for comparison and
- converted there.
-
- RETURN VALUES
- 0
-*/
-
-#define max_sort_chr ((char) 255)
-
-static
-my_bool my_like_range_tis620(CHARSET_INFO *cs __attribute__((unused)),
- const char *ptr, uint ptr_length,
- pbool escape, pbool w_one, pbool w_many,
- uint res_length, char *min_str, char *max_str,
- uint *min_length, uint *max_length)
-{
- const char *end=ptr+ptr_length;
- char *min_org=min_str;
- char *min_end=min_str+res_length;
-
- for (; ptr != end && min_str != min_end ; ptr++)
- {
- if (*ptr == escape && ptr+1 != end)
- {
- ptr++; /* Skip escape */
- *min_str++ = *max_str++ = *ptr;
- continue;
- }
- if (*ptr == w_one) /* '_' in SQL */
- {
- *min_str++='\0'; /* This should be min char */
- *max_str++=max_sort_chr;
- continue;
- }
- if (*ptr == w_many) /* '%' in SQL */
- {
- *min_length= (uint) (min_str - min_org);
- *max_length=res_length;
- do
- {
- *min_str++ = 0;
- *max_str++ = max_sort_chr;
- } while (min_str != min_end);
- return 0;
- }
- *min_str++= *max_str++ = *ptr;
- }
- *min_length= *max_length = (uint) (min_str - min_org);
-
- while (min_str != min_end)
- *min_str++ = *max_str++ = ' '; /* Because of key compression */
- return 0;
-}
-
-
static unsigned short cs_to_uni[256]={
0x0000,0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007,
0x0008,0x0009,0x000A,0x000B,0x000C,0x000D,0x000E,0x000F,
@@ -914,7 +849,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
my_strnncoll_tis620,
my_strnncollsp_tis620,
my_strnxfrm_tis620,
- my_like_range_tis620,
+ my_like_range_simple,
my_wildcmp_8bit, /* wildcmp */
my_strcasecmp_8bit,
my_instr_simple, /* QQ: To be fixed */
@@ -974,7 +909,7 @@ CHARSET_INFO my_charset_tis620_thai_ci=
1, /* mbminlen */
1, /* mbmaxlen */
0, /* min_sort_char */
- 0, /* max_sort_char */
+ 255, /* max_sort_char */
0, /* escape_with_backslash_is_dangerous */
&my_charset_handler,
&my_collation_ci_handler
@@ -1002,7 +937,7 @@ CHARSET_INFO my_charset_tis620_bin=
1, /* mbminlen */
1, /* mbmaxlen */
0, /* min_sort_char */
- 0, /* max_sort_char */
+ 255, /* max_sort_char */
0, /* escape_with_backslash_is_dangerous */
&my_charset_handler,
&my_collation_8bit_bin_handler
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index 2761e781724..56c05635300 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -1426,10 +1426,12 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs,
const char *end=ptr+ptr_length;
char *min_org=min_str;
char *min_end=min_str+res_length;
+ uint charlen= res_length / cs->mbmaxlen;
- for (; ptr + 1 < end && min_str + 1 < min_end ; ptr+=2)
+ for ( ; ptr + 1 < end && min_str + 1 < min_end && charlen > 0
+ ; ptr+=2, charlen--)
{
- if (ptr[0] == '\0' && ptr[1] == escape && ptr+2 < end)
+ if (ptr[0] == '\0' && ptr[1] == escape && ptr + 1 < end)
{
ptr+=2; /* Skip escape */
*min_str++= *max_str++ = ptr[0];