diff options
author | unknown <bar@bar.mysql.r18.ru> | 2003-09-19 15:18:19 +0500 |
---|---|---|
committer | unknown <bar@bar.mysql.r18.ru> | 2003-09-19 15:18:19 +0500 |
commit | 44bffa0b0564a0d28558202d6cebbbea5eee1729 (patch) | |
tree | a720cfa7596cc981bc05e41c0c8c4a6103870eaa | |
parent | 4c63804846d1530b602a74ff30ad26df7645a94b (diff) | |
download | mariadb-git-44bffa0b0564a0d28558202d6cebbbea5eee1729.tar.gz |
Fixed that multibyte charsets didn't honor multibyte
sequence boundaries in functions LIKE and LOCATE in
the case of "binary" collation. Comparison was done
like if the strings were just a binary strings without
character set assumption.
-rw-r--r-- | include/m_ctype.h | 14 | ||||
-rw-r--r-- | mysql-test/r/ctype_ujis.result | 58 | ||||
-rw-r--r-- | mysql-test/r/have_ujis.require | 2 | ||||
-rw-r--r-- | mysql-test/t/ctype_ujis.test | 26 | ||||
-rw-r--r-- | mysys/charset.c | 2 | ||||
-rw-r--r-- | sql/item_func.cc | 54 | ||||
-rw-r--r-- | strings/ctype-big5.c | 3 | ||||
-rw-r--r-- | strings/ctype-bin.c | 43 | ||||
-rw-r--r-- | strings/ctype-czech.c | 1 | ||||
-rw-r--r-- | strings/ctype-euc_kr.c | 7 | ||||
-rw-r--r-- | strings/ctype-gb2312.c | 9 | ||||
-rw-r--r-- | strings/ctype-gbk.c | 3 | ||||
-rw-r--r-- | strings/ctype-latin1.c | 3 | ||||
-rw-r--r-- | strings/ctype-mb.c | 223 | ||||
-rw-r--r-- | strings/ctype-simple.c | 39 | ||||
-rw-r--r-- | strings/ctype-sjis.c | 3 | ||||
-rw-r--r-- | strings/ctype-tis620.c | 3 | ||||
-rw-r--r-- | strings/ctype-ucs2.c | 3 | ||||
-rw-r--r-- | strings/ctype-ujis.c | 3 | ||||
-rw-r--r-- | strings/ctype-utf8.c | 3 | ||||
-rw-r--r-- | strings/ctype-win1250ch.c | 1 |
21 files changed, 437 insertions, 66 deletions
diff --git a/include/m_ctype.h b/include/m_ctype.h index 8116058d687..603bb3cc2e5 100644 --- a/include/m_ctype.h +++ b/include/m_ctype.h @@ -115,12 +115,17 @@ typedef struct my_collation_handler_st int (*strcasecmp)(struct charset_info_st *, const char *, const char *); + int (*instr)(struct charset_info_st *, + const char *big, uint b_length, + const char *small, uint s_length); + /* Hash calculation */ void (*hash_sort)(struct charset_info_st *cs, const uchar *key, uint len, ulong *nr1, ulong *nr2); } MY_COLLATION_HANDLER; -extern MY_COLLATION_HANDLER my_collation_bin_handler; +extern MY_COLLATION_HANDLER my_collation_mb_bin_handler; +extern MY_COLLATION_HANDLER my_collation_8bit_bin_handler; extern MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler; @@ -243,6 +248,10 @@ extern void my_hash_sort_simple(CHARSET_INFO *cs, extern uint my_lengthsp_8bit(CHARSET_INFO *cs, const char *ptr, uint length); +extern int my_instr_simple(struct charset_info_st *, + const char *big, uint b_length, + const char *small, uint s_length); + /* Functions for 8bit */ extern void my_caseup_str_8bit(CHARSET_INFO *, char *); @@ -307,6 +316,9 @@ int my_wildcmp_mb(CHARSET_INFO *, int escape, int w_one, int w_many); uint my_numchars_mb(CHARSET_INFO *, const char *b, const char *e); uint my_charpos_mb(CHARSET_INFO *, const char *b, const char *e, uint pos); +int my_instr_mb(struct charset_info_st *, + const char *big, uint b_length, + const char *small, uint s_length); extern my_bool my_parse_charset_xml(const char *bug, uint len, diff --git a/mysql-test/r/ctype_ujis.result b/mysql-test/r/ctype_ujis.result index ffb305a81cf..1730b17eaed 100644 --- a/mysql-test/r/ctype_ujis.result +++ b/mysql-test/r/ctype_ujis.result @@ -1,4 +1,5 @@ drop table if exists t1; +set names ujis; create table t1 (c text character set ujis); insert into t1 values (0xa4a2),(0xa4a3); select hex(left(c,1)) from t1 group by c; @@ -6,3 +7,60 @@ hex(left(c,1)) A4A2 A4A3 drop table t1; +select locate(0xa2a1,0xa1a2a1a3); +locate(0xa2a1,0xa1a2a1a3) +2 +select locate(_ujis 0xa2a1,_ujis 0xa1a2a1a3); +locate(_ujis 0xa2a1,_ujis 0xa1a2a1a3) +0 +select locate(_ujis 0xa2a1,_ujis 0xa1a2a1a3 collate ujis_bin); +locate(_ujis 0xa2a1,_ujis 0xa1a2a1a3 collate ujis_bin) +0 +select locate('he','hello'); +locate('he','hello') +1 +select locate('he','hello',2); +locate('he','hello',2) +0 +select locate('lo','hello',2); +locate('lo','hello',2) +4 +select locate('HE','hello'); +locate('HE','hello') +1 +select locate('HE','hello',2); +locate('HE','hello',2) +0 +select locate('LO','hello',2); +locate('LO','hello',2) +4 +select locate('HE','hello' collate ujis_bin); +locate('HE','hello' collate ujis_bin) +0 +select locate('HE','hello' collate ujis_bin,2); +locate('HE','hello' collate ujis_bin,2) +0 +select locate('LO','hello' collate ujis_bin,2); +locate('LO','hello' collate ujis_bin,2) +0 +select locate(_ujis 0xa1a3,_ujis 0xa1a2a1a3); +locate(_ujis 0xa1a3,_ujis 0xa1a2a1a3) +2 +select 0xa1a2a1a3 like concat(_binary'%',0xa2a1,_binary'%'); +0xa1a2a1a3 like concat(_binary'%',0xa2a1,_binary'%') +1 +select _ujis 0xa1a2a1a3 like concat(_ujis'%',_ujis 0xa2a1, _ujis'%'); +_ujis 0xa1a2a1a3 like concat(_ujis'%',_ujis 0xa2a1, _ujis'%') +0 +select _ujis 0xa1a2a1a3 like concat(_ujis'%',_ujis 0xa2a1, _ujis'%') collate ujis_bin; +_ujis 0xa1a2a1a3 like concat(_ujis'%',_ujis 0xa2a1, _ujis'%') collate ujis_bin +0 +select 'a' like 'a'; +'a' like 'a' +1 +select 'A' like 'a'; +'A' like 'a' +1 +select 'A' like 'a' collate ujis_bin; +'A' like 'a' collate ujis_bin +0 diff --git a/mysql-test/r/have_ujis.require b/mysql-test/r/have_ujis.require index b4de2234ec7..43a309ad74e 100644 --- a/mysql-test/r/have_ujis.require +++ b/mysql-test/r/have_ujis.require @@ -1,2 +1,2 @@ Collation Charset Id Default Compiled Sortlen -ujis_japanese_ci ujis 12 Yes Yes 0 +ujis_japanese_ci ujis 12 Yes Yes 1 diff --git a/mysql-test/t/ctype_ujis.test b/mysql-test/t/ctype_ujis.test index e41caf55948..bcf6507b4c7 100644 --- a/mysql-test/t/ctype_ujis.test +++ b/mysql-test/t/ctype_ujis.test @@ -7,6 +7,8 @@ drop table if exists t1; --enable_warnings +set names ujis; + # # Test problem with LEFT() # @@ -15,3 +17,27 @@ create table t1 (c text character set ujis); insert into t1 values (0xa4a2),(0xa4a3); select hex(left(c,1)) from t1 group by c; drop table t1; + +# +# +# +select locate(0xa2a1,0xa1a2a1a3); +select locate(_ujis 0xa2a1,_ujis 0xa1a2a1a3); +select locate(_ujis 0xa2a1,_ujis 0xa1a2a1a3 collate ujis_bin); +select locate('he','hello'); +select locate('he','hello',2); +select locate('lo','hello',2); +select locate('HE','hello'); +select locate('HE','hello',2); +select locate('LO','hello',2); +select locate('HE','hello' collate ujis_bin); +select locate('HE','hello' collate ujis_bin,2); +select locate('LO','hello' collate ujis_bin,2); +select locate(_ujis 0xa1a3,_ujis 0xa1a2a1a3); + +select 0xa1a2a1a3 like concat(_binary'%',0xa2a1,_binary'%'); +select _ujis 0xa1a2a1a3 like concat(_ujis'%',_ujis 0xa2a1, _ujis'%'); +select _ujis 0xa1a2a1a3 like concat(_ujis'%',_ujis 0xa2a1, _ujis'%') collate ujis_bin; +select 'a' like 'a'; +select 'A' like 'a'; +select 'A' like 'a' collate ujis_bin; diff --git a/mysys/charset.c b/mysys/charset.c index 58ce8f5b2fe..e8406173b33 100644 --- a/mysys/charset.c +++ b/mysys/charset.c @@ -119,7 +119,7 @@ static void simple_cs_init_functions(CHARSET_INFO *cs) if (cs->state & MY_CS_BINSORT) { - cs->coll= &my_collation_bin_handler; + cs->coll= &my_collation_8bit_bin_handler; } else { diff --git a/sql/item_func.cc b/sql/item_func.cc index fe419745b60..35df0dbe40d 100644 --- a/sql/item_func.cc +++ b/sql/item_func.cc @@ -1153,7 +1153,6 @@ longlong Item_func_locate::val_int() { String *a=args[0]->val_str(&value1); String *b=args[1]->val_str(&value2); - bool binary_cmp= (cmp_collation.collation->state & MY_CS_BINSORT) ? 1 : 0; if (!a || !b) { null_value=1; @@ -1161,55 +1160,26 @@ longlong Item_func_locate::val_int() } null_value=0; uint start=0; -#ifdef USE_MB uint start0=0; -#endif + int ind; + if (arg_count == 3) { - start=(uint) args[2]->val_int()-1; -#ifdef USE_MB - if (use_mb(cmp_collation.collation)) - { - start0=start; - if (!binary_cmp) - start=a->charpos(start); - } -#endif + start0= start =(uint) args[2]->val_int()-1; + start=a->charpos(start); + if (start > a->length() || start+b->length() > a->length()) return 0; } + if (!b->length()) // Found empty string at start return (longlong) (start+1); -#ifdef USE_MB - if (use_mb(cmp_collation.collation) && !binary_cmp) - { - const char *ptr=a->ptr()+start; - const char *search=b->ptr(); - const char *strend = ptr+a->length(); - const char *end=strend-b->length()+1; - const char *search_end=search+b->length(); - register uint32 l; - while (ptr < end) - { - if (*ptr == *search) - { - register char *i,*j; - i=(char*) ptr+1; j=(char*) search+1; - while (j != search_end) - if (*i++ != *j++) goto skipp; - return (longlong) start0+1; - } - skipp: - if ((l=my_ismbchar(cmp_collation.collation,ptr,strend))) - ptr+=l; - else ++ptr; - ++start0; - } - return 0; - } -#endif /* USE_MB */ - return (longlong) (binary_cmp ? a->strstr(*b,start) : - (a->strstr_case(*b,start)))+1; + + ind= cmp_collation.collation->coll->instr(cmp_collation.collation, + a->ptr()+start, a->length()-start, + b->ptr(), b->length()); + + return (longlong) (ind >= 0 ? ind + start0 + 1 : ind + 1); } diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c index ddcec46474d..878493bc31f 100644 --- a/strings/ctype-big5.c +++ b/strings/ctype-big5.c @@ -6234,6 +6234,7 @@ static MY_COLLATION_HANDLER my_collation_big5_chinese_ci_handler = my_like_range_big5, my_wildcmp_mb, my_strcasecmp_mb, + my_instr_mb, my_hash_sort_simple }; @@ -6305,7 +6306,7 @@ CHARSET_INFO my_charset_big5_bin= 2, /* mbmaxlen */ 0, &my_charset_big5_handler, - &my_collation_bin_handler + &my_collation_mb_bin_handler }; diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c index 9441268739a..3f74f514c48 100644 --- a/strings/ctype-bin.c +++ b/strings/ctype-bin.c @@ -262,8 +262,46 @@ static int my_strnxfrm_bin(CHARSET_INFO *cs __attribute__((unused)), return len; } +static +int my_instr_bin(CHARSET_INFO *cs __attribute__((unused)), + const char *big, uint b_length, + const char *small, uint s_length) +{ + register const uchar *str, *search, *end, *search_end; + + if (s_length <= b_length) + { + if (!s_length) + return 0; // Empty string is always found + + str= (const uchar*) big; + search= (const uchar*) small; + end= (const uchar*) big+b_length-s_length+1; + search_end= (const uchar*) small + s_length; + +skipp: + while (str != end) + { + if ( (*str++) == (*search)) + { + register const uchar *i,*j; + + i= str; + j= search+1; + + while (j != search_end) + if ((*i++) != (*j++)) + goto skipp; + + return (int) (str- (const uchar*)big) -1; + } + } + } + return -1; +} + -MY_COLLATION_HANDLER my_collation_bin_handler = +MY_COLLATION_HANDLER my_collation_8bit_bin_handler = { my_strnncoll_binary, my_strnncollsp_binary, @@ -271,6 +309,7 @@ MY_COLLATION_HANDLER my_collation_bin_handler = my_like_range_simple, my_wildcmp_bin, my_strcasecmp_bin, + my_instr_bin, my_hash_sort_bin }; @@ -317,5 +356,5 @@ CHARSET_INFO my_charset_bin = 1, /* mbmaxlen */ (char) 255, /* max_sort_char */ &my_charset_handler, - &my_collation_bin_handler + &my_collation_8bit_bin_handler }; diff --git a/strings/ctype-czech.c b/strings/ctype-czech.c index 2398bc33d45..b2e4f1886ed 100644 --- a/strings/ctype-czech.c +++ b/strings/ctype-czech.c @@ -612,6 +612,7 @@ static MY_COLLATION_HANDLER my_collation_latin2_czech_ci_handler = my_like_range_czech, my_wildcmp_8bit, my_strcasecmp_8bit, + my_instr_simple, my_hash_sort_simple, }; diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c index bb3e66a8c6f..d47c4268642 100644 --- a/strings/ctype-euc_kr.c +++ b/strings/ctype-euc_kr.c @@ -8637,12 +8637,13 @@ my_mb_wc_euc_kr(CHARSET_INFO *cs __attribute__((unused)), static MY_COLLATION_HANDLER my_collation_ci_handler = { - my_strnncoll_simple,/* strnncoll */ + my_strnncoll_simple, /* strnncoll */ my_strnncollsp_simple, my_strnxfrm_simple, /* strnxfrm */ - my_like_range_simple,/* like_range */ + my_like_range_simple, /* like_range */ my_wildcmp_mb, /* wildcmp */ my_strcasecmp_mb, + my_instr_mb, my_hash_sort_simple, }; @@ -8714,7 +8715,7 @@ CHARSET_INFO my_charset_euckr_bin= 2, /* mbmaxlen */ 0, &my_charset_handler, - &my_collation_bin_handler + &my_collation_mb_bin_handler }; #endif diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c index b8cee35b186..d429fa34eea 100644 --- a/strings/ctype-gb2312.c +++ b/strings/ctype-gb2312.c @@ -5687,12 +5687,13 @@ my_mb_wc_gb2312(CHARSET_INFO *cs __attribute__((unused)), static MY_COLLATION_HANDLER my_collation_ci_handler = { - my_strnncoll_simple,/* strnncoll */ + my_strnncoll_simple, /* strnncoll */ my_strnncollsp_simple, my_strnxfrm_simple, /* strnxfrm */ - my_like_range_simple,/* like_range */ + my_like_range_simple, /* like_range */ my_wildcmp_mb, /* wildcmp */ - my_strcasecmp_mb, + my_strcasecmp_mb, /* instr */ + my_instr_mb, my_hash_sort_simple, }; @@ -5763,7 +5764,7 @@ CHARSET_INFO my_charset_gb2312_bin= 2, /* mbmaxlen */ 0, &my_charset_handler, - &my_collation_bin_handler + &my_collation_mb_bin_handler }; #endif diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c index 4a84eabaf8c..fa7aa175103 100644 --- a/strings/ctype-gbk.c +++ b/strings/ctype-gbk.c @@ -9890,6 +9890,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = my_like_range_gbk, my_wildcmp_mb, my_strcasecmp_mb, + my_instr_mb, my_hash_sort_simple, }; @@ -9960,7 +9961,7 @@ CHARSET_INFO my_charset_gbk_bin= 2, /* mbmaxlen */ 0, &my_charset_handler, - &my_collation_bin_handler + &my_collation_mb_bin_handler }; diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c index edbd350f364..a8a5329f844 100644 --- a/strings/ctype-latin1.c +++ b/strings/ctype-latin1.c @@ -390,6 +390,7 @@ static MY_COLLATION_HANDLER my_collation_german2_ci_handler= my_like_range_simple, my_wildcmp_8bit, my_strcasecmp_8bit, + my_instr_simple, my_hash_sort_simple }; @@ -435,6 +436,6 @@ CHARSET_INFO my_charset_latin1_bin= 1, /* mbmaxlen */ 0, &my_charset_handler, - &my_collation_bin_handler + &my_collation_8bit_bin_handler }; diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c index 3d1abe95675..03323b3d3a1 100644 --- a/strings/ctype-mb.c +++ b/strings/ctype-mb.c @@ -126,11 +126,7 @@ int my_strcasecmp_mb(CHARSET_INFO * cs,const char *s, const char *t) #define INC_PTR(cs,A,B) A+=((use_mb_flag && \ my_ismbchar(cs,A,B)) ? my_ismbchar(cs,A,B) : 1) -#ifdef LIKE_CMP_TOUPPER -#define likeconv(s,A) (uchar) my_toupper(s,A) -#else #define likeconv(s,A) (uchar) (s)->sort_order[(uchar) (A)] -#endif int my_wildcmp_mb(CHARSET_INFO *cs, const char *str,const char *str_end, @@ -278,5 +274,224 @@ uint my_charpos_mb(CHARSET_INFO *cs __attribute__((unused)), return b-b0; } +int my_instr_mb(CHARSET_INFO *cs, + const char *big, uint b_length, + const char *small, uint s_length) +{ + register const char *end; + int res= 0; + + if (s_length <= b_length) + { + if (!s_length) + return 0; // Empty string is always found + + end= big+b_length-s_length+1; + + while (big < end) + { + int mblen; + + if (!cs->coll->strnncoll(cs, (unsigned char*) big, s_length, + (unsigned char*) small, s_length)) + return res; + + mblen= (mblen= my_ismbchar(cs, big, end)) ? mblen : 1; + big+= mblen; + b_length-= mblen; + res++; + } + } + return -1; +} + +/* BINARY collations handlers for MB charsets */ + +static int my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)), + const uchar *s, uint slen, + const uchar *t, uint tlen) +{ + int cmp= memcmp(s,t,min(slen,tlen)); + return cmp ? cmp : (int) (slen - tlen); +} + +static int my_strnncollsp_mb_bin(CHARSET_INFO * cs, + const uchar *s, uint slen, + const uchar *t, uint tlen) +{ + int len, cmp; + + for ( ; slen && my_isspace(cs, s[slen-1]) ; slen--); + for ( ; tlen && my_isspace(cs, t[tlen-1]) ; tlen--); + + len = ( slen > tlen ) ? tlen : slen; + + cmp= memcmp(s,t,len); + return cmp ? cmp : (int) (slen - tlen); +} + +static int my_strnxfrm_mb_bin(CHARSET_INFO *cs __attribute__((unused)), + uchar * dest, uint len, + const uchar *src, + uint srclen __attribute__((unused))) +{ + if (dest != src) + memcpy(dest,src,len= min(len,srclen)); + return len; +} + + +static int my_strcasecmp_mb_bin(CHARSET_INFO * cs __attribute__((unused)), + const char *s, const char *t) +{ + return strcmp(s,t); +} + +static void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)), + const uchar *key, uint len,ulong *nr1, ulong *nr2) +{ + const uchar *pos = key; + + key+= len; + + for (; pos < (uchar*) key ; pos++) + { + nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * + ((uint)*pos)) + (nr1[0] << 8); + nr2[0]+=3; + } +} + +static int my_wildcmp_mb_bin(CHARSET_INFO *cs, + const char *str,const char *str_end, + const char *wildstr,const char *wildend, + int escape, int w_one, int w_many) +{ + int result= -1; /* Not found, using wildcards */ + + bool use_mb_flag=use_mb(cs); + + while (wildstr != wildend) + { + while (*wildstr != w_many && *wildstr != w_one) + { + int l; + if (*wildstr == escape && wildstr+1 != wildend) + wildstr++; + if (use_mb_flag && + (l = my_ismbchar(cs, wildstr, wildend))) + { + if (str+l > str_end || memcmp(str, wildstr, l) != 0) + return 1; + str += l; + wildstr += l; + } + else + if (str == str_end || *wildstr++ != *str++) + return(1); /* No match */ + if (wildstr == wildend) + return (str != str_end); /* Match if both are at end */ + result=1; /* Found an anchor char */ + } + if (*wildstr == w_one) + { + do + { + if (str == str_end) /* Skip one char if possible */ + return (result); + INC_PTR(cs,str,str_end); + } while (++wildstr < wildend && *wildstr == w_one); + if (wildstr == wildend) + break; + } + if (*wildstr == w_many) + { /* Found w_many */ + uchar cmp; + const char* mb = wildstr; + int mblen=0; + + wildstr++; + /* Remove any '%' and '_' from the wild search string */ + for (; wildstr != wildend ; wildstr++) + { + if (*wildstr == w_many) + continue; + if (*wildstr == w_one) + { + if (str == str_end) + return (-1); + INC_PTR(cs,str,str_end); + continue; + } + break; /* Not a wild character */ + } + if (wildstr == wildend) + return(0); /* Ok if w_many is last */ + if (str == str_end) + return -1; + + if ((cmp= *wildstr) == escape && wildstr+1 != wildend) + cmp= *++wildstr; + + mb=wildstr; + LINT_INIT(mblen); + if (use_mb_flag) + mblen = my_ismbchar(cs, wildstr, wildend); + INC_PTR(cs,wildstr,wildend); /* This is compared trough cmp */ + do + { + if (use_mb_flag) + { + for (;;) + { + if (str >= str_end) + return -1; + if (mblen) + { + if (str+mblen <= str_end && memcmp(str, mb, mblen) == 0) + { + str += mblen; + break; + } + } + else if (!my_ismbchar(cs, str, str_end) && *str == cmp) + { + str++; + break; + } + INC_PTR(cs,str, str_end); + } + } + else + { + while (str != str_end && *str != cmp) + str++; + if (str++ == str_end) return (-1); + } + { + int tmp=my_wildcmp_mb(cs,str,str_end,wildstr,wildend,escape,w_one,w_many); + if (tmp <= 0) + return (tmp); + } + } while (str != str_end && wildstr[0] != w_many); + return(-1); + } + } + return (str != str_end ? 1 : 0); +} + + +MY_COLLATION_HANDLER my_collation_mb_bin_handler = +{ + my_strnncoll_mb_bin, + my_strnncollsp_mb_bin, + my_strnxfrm_mb_bin, + my_like_range_simple, + my_wildcmp_mb_bin, + my_strcasecmp_mb_bin, + my_instr_mb, + my_hash_sort_mb_bin +}; + #endif diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c index ca0097579bd..152980dd305 100644 --- a/strings/ctype-simple.c +++ b/strings/ctype-simple.c @@ -1030,6 +1030,44 @@ uint my_lengthsp_8bit(CHARSET_INFO *cs __attribute__((unused)), } +int my_instr_simple(CHARSET_INFO *cs, + const char *big, uint b_length, + const char *small, uint s_length) +{ + register const uchar *str, *search, *end, *search_end; + + if (s_length <= b_length) + { + if (!s_length) + return 0; // Empty string is always found + + str= (const uchar*) big; + search= (const uchar*) small; + end= (const uchar*) big+b_length-s_length+1; + search_end= (const uchar*) small + s_length; + +skipp: + while (str != end) + { + if (cs->sort_order[*str++] == cs->sort_order[*search]) + { + register const uchar *i,*j; + + i= str; + j= search+1; + + while (j != search_end) + if (cs->sort_order[*i++] != cs->sort_order[*j++]) + goto skipp; + + return (int) (str- (const uchar*)big) -1; + } + } + } + return -1; +} + + MY_CHARSET_HANDLER my_charset_8bit_handler= { NULL, /* ismbchar */ @@ -1063,5 +1101,6 @@ MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler = my_like_range_simple, my_wildcmp_8bit, my_strcasecmp_8bit, + my_instr_simple, my_hash_sort_simple }; diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c index 9827c19e7fb..f302e678b9f 100644 --- a/strings/ctype-sjis.c +++ b/strings/ctype-sjis.c @@ -4477,6 +4477,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = my_like_range_sjis, my_wildcmp_mb, /* wildcmp */ my_strcasecmp_8bit, + my_instr_mb, my_hash_sort_simple, }; @@ -4547,7 +4548,7 @@ CHARSET_INFO my_charset_sjis_bin= 2, /* mbmaxlen */ 0, &my_charset_handler, - &my_collation_bin_handler + &my_collation_mb_bin_handler }; #endif diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c index 8a6e00b973a..a4d8a7d1f79 100644 --- a/strings/ctype-tis620.c +++ b/strings/ctype-tis620.c @@ -710,6 +710,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = my_like_range_tis620, my_wildcmp_8bit, /* wildcmp */ my_strcasecmp_8bit, + NULL, my_hash_sort_simple, }; @@ -781,7 +782,7 @@ CHARSET_INFO my_charset_tis620_bin= 1, /* mbmaxlen */ 0, &my_charset_handler, - &my_collation_bin_handler + &my_collation_8bit_bin_handler }; diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index a20502c65d4..31c0f063529 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -1028,6 +1028,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = my_like_range_simple, my_wildcmp_mb, my_strcasecmp_ucs2, + my_instr_mb, my_hash_sort_ucs2 }; @@ -1100,7 +1101,7 @@ CHARSET_INFO my_charset_ucs2_bin= 2, /* mbmaxlen */ 0, &my_charset_handler, - &my_collation_bin_handler + &my_collation_mb_bin_handler }; diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c index 5ef6c1b7486..29375aca727 100644 --- a/strings/ctype-ujis.c +++ b/strings/ctype-ujis.c @@ -8434,6 +8434,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = my_like_range_simple,/* like_range */ my_wildcmp_mb, /* wildcmp */ my_strcasecmp_mb, + my_instr_mb, my_hash_sort_simple, }; @@ -8504,7 +8505,7 @@ CHARSET_INFO my_charset_ujis_bin= 3, /* mbmaxlen */ 0, &my_charset_handler, - &my_collation_bin_handler + &my_collation_mb_bin_handler }; diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index 450c2d7aa93..3ede1aa26f6 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -1959,6 +1959,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = my_like_range_simple, my_wildcmp_mb, my_strcasecmp_utf8, + my_instr_mb, my_hash_sort_utf8 }; @@ -2031,7 +2032,7 @@ CHARSET_INFO my_charset_utf8_bin= 3, /* mbmaxlen */ 0, &my_charset_handler, - &my_collation_bin_handler + &my_collation_mb_bin_handler }; diff --git a/strings/ctype-win1250ch.c b/strings/ctype-win1250ch.c index 12a8edc4a8f..60a5737009f 100644 --- a/strings/ctype-win1250ch.c +++ b/strings/ctype-win1250ch.c @@ -652,6 +652,7 @@ static MY_COLLATION_HANDLER my_collation_czech_ci_handler = my_like_range_win1250ch, my_wildcmp_8bit, my_strcasecmp_8bit, + my_instr_simple, my_hash_sort_simple }; |