diff options
author | unknown <bar@bar.mysql.r18.ru> | 2003-09-22 17:18:47 +0500 |
---|---|---|
committer | unknown <bar@bar.mysql.r18.ru> | 2003-09-22 17:18:47 +0500 |
commit | de826fdbfbb5093c48ce6de7e9bd8500342a7d99 (patch) | |
tree | dd3aba3e352d64171dfb6fb85f83ff531c272042 /strings | |
parent | 96eb819704adb0cce5319b8603087e45da87b8fd (diff) | |
download | mariadb-git-de826fdbfbb5093c48ce6de7e9bd8500342a7d99.tar.gz |
Bug 1181 fix. LIKE didn't work with UCS2 character set.
Diffstat (limited to 'strings')
-rw-r--r-- | strings/ctype-ucs2.c | 263 |
1 files changed, 255 insertions, 8 deletions
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index 31c0f063529..cdcd91b2916 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -90,7 +90,7 @@ static uchar to_upper_ucs2[] = { }; -static int my_ucs2_uni (CHARSET_INFO *cs __attribute__((unused)) , +static int my_ucs2_uni (CHARSET_INFO *cs __attribute__((unused)), my_wc_t * pwc, const uchar *s, const uchar *e) { if (s+2 > e) /* Need 2 characters */ @@ -1018,21 +1018,268 @@ uint my_lengthsp_ucs2(CHARSET_INFO *cs __attribute__((unused)), return (uint) (end-ptr); } +/* +** Compare string against string with wildcard +** 0 if matched +** -1 if not matched with wildcard +** 1 if matched with wildcard +*/ + +static +int my_wildcmp_ucs2(CHARSET_INFO *cs, + const char *str,const char *str_end, + const char *wildstr,const char *wildend, + int escape, int w_one, int w_many, + MY_UNICASE_INFO **weights) +{ + int result= -1; /* Not found, using wildcards */ + my_wc_t s_wc, w_wc; + int scan, plane; + + while (wildstr != wildend) + { + + while (1) + { + scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr, (const uchar*)wildend); + if (scan <= 0) + return 1; + wildstr+= scan; + + if (w_wc == (my_wc_t)escape) + { + scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr, (const uchar*)wildend); + if (scan <= 0) + return 1; + wildstr+= scan; + } + + if (w_wc == (my_wc_t)w_many) + { + result= 1; /* Found an anchor char */ + break; + } + + scan= my_ucs2_uni(cs, &s_wc, (const uchar*)str, (const uchar*)str_end); + if (scan <=0) + return 1; + str+= scan; + + if (w_wc == (my_wc_t)w_one) + { + result= 1; /* Found an anchor char */ + } + else + { + if (weights) + { + plane=(s_wc>>8) & 0xFF; + s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc; + plane=(w_wc>>8) & 0xFF; + w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc; + } + if (s_wc != w_wc) + return 1; /* No match */ + } + if (wildstr == wildend) + return (str != str_end); /* Match if both are at end */ + } + + + if (w_wc == (my_wc_t)w_many) + { /* Found w_many */ + + /* Remove any '%' and '_' from the wild search string */ + for ( ; wildstr != wildend ; ) + { + scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr, (const uchar*)wildend); + if (scan <= 0) + return 1; + wildstr+= scan; + + if (w_wc == (my_wc_t)w_many) + continue; + + if (w_wc == (my_wc_t)w_one) + { + scan= my_ucs2_uni(cs, &s_wc, (const uchar*)str, (const uchar*)str_end); + if (scan <=0) + return 1; + str+= scan; + continue; + } + break; /* Not a wild character */ + } + + if (wildstr == wildend) + return 0; /* Ok if w_many is last */ + + if (str == str_end) + return -1; + + scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr, (const uchar*)wildend); + if (scan <= 0) + return 1; + wildstr+= scan; + + if (w_wc == (my_wc_t)escape) + { + scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr, (const uchar*)wildend); + if (scan <= 0) + return 1; + wildstr+= scan; + } + + do + { + /* Skip until the first character from wildstr is found */ + while (str != str_end) + { + scan= my_ucs2_uni(cs,&s_wc, (const uchar*)str, (const uchar*)str_end); + if (scan <= 0) + return 1; + str+= scan; + + if (weights) + { + plane=(s_wc>>8) & 0xFF; + s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc; + plane=(w_wc>>8) & 0xFF; + w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc; + } + + if (s_wc == w_wc) + break; + } + if (str == str_end) + return -1; + + result= my_wildcmp_ucs2(cs,str,str_end,wildstr,wildend,escape, + w_one,w_many,weights); + if (result <= 0) + return result; + + } while (str != str_end && w_wc != (my_wc_t)w_many); + return -1; + } + } + return (str != str_end ? 1 : 0); +} -static MY_COLLATION_HANDLER my_collation_ci_handler = +static +int my_wildcmp_ucs2_ci(CHARSET_INFO *cs, + const char *str,const char *str_end, + const char *wildstr,const char *wildend, + int escape, int w_one, int w_many) +{ + return my_wildcmp_ucs2(cs,str,str_end,wildstr,wildend, + escape,w_one,w_many,uni_plane); +} + +static +int my_wildcmp_ucs2_bin(CHARSET_INFO *cs, + const char *str,const char *str_end, + const char *wildstr,const char *wildend, + int escape, int w_one, int w_many) +{ + return my_wildcmp_ucs2(cs,str,str_end,wildstr,wildend, + escape,w_one,w_many,NULL); +} + + +static +int my_strnncoll_ucs2_bin(CHARSET_INFO *cs, + const uchar *s, uint slen, + const uchar *t, uint tlen) +{ + int s_res,t_res; + my_wc_t s_wc,t_wc; + const uchar *se=s+slen; + const uchar *te=t+tlen; + + while ( s < se && t < te ) + { + s_res=my_ucs2_uni(cs,&s_wc, s, se); + t_res=my_ucs2_uni(cs,&t_wc, t, te); + + if ( s_res <= 0 || t_res <= 0 ) + { + /* Incorrect string, compare by char value */ + return ((int)s[0]-(int)t[0]); + } + if ( s_wc != t_wc ) + { + return ((int) s_wc) - ((int) t_wc); + } + + s+=s_res; + t+=t_res; + } + return ( (se-s) - (te-t) ); +} + +static +int my_strcasecmp_ucs2_bin(CHARSET_INFO *cs, const char *s, const char *t) +{ + uint s_len=strlen(s); + uint t_len=strlen(t); + uint len = (s_len > t_len) ? s_len : t_len; + return my_strncasecmp_ucs2(cs, s, t, len); +} + +static +int my_strnxfrm_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)), + uchar *dst, uint dstlen, + const uchar *src, uint srclen) +{ + if (dst != src) + memcpy(dst,src,srclen= min(dstlen,srclen)); + return srclen; +} + +static +void my_hash_sort_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)), + const uchar *key, uint len,ulong *nr1, ulong *nr2) +{ + const uchar *pos = key; + + key+= len; + + for (; pos < (uchar*) key ; pos++) + { + nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * + ((uint)*pos)) + (nr1[0] << 8); + nr2[0]+=3; + } +} + + +static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler = { my_strnncoll_ucs2, my_strnncoll_ucs2, my_strnxfrm_ucs2, my_like_range_simple, - my_wildcmp_mb, + my_wildcmp_ucs2_ci, my_strcasecmp_ucs2, my_instr_mb, my_hash_sort_ucs2 }; -static MY_CHARSET_HANDLER my_charset_handler= +static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler = +{ + my_strnncoll_ucs2_bin, + my_strnncoll_ucs2_bin, + my_strnxfrm_ucs2_bin, + my_like_range_simple, + my_wildcmp_ucs2_bin, + my_strcasecmp_ucs2_bin, + my_instr_mb, + my_hash_sort_ucs2_bin +}; + +static MY_CHARSET_HANDLER my_charset_ucs2_handler= { my_ismbchar_ucs2, /* ismbchar */ my_mbcharlen_ucs2, /* mbcharlen */ @@ -1077,8 +1324,8 @@ CHARSET_INFO my_charset_ucs2_general_ci= 1, /* strxfrm_multiply */ 2, /* mbmaxlen */ 0, - &my_charset_handler, - &my_collation_ci_handler + &my_charset_ucs2_handler, + &my_collation_ucs2_general_ci_handler }; @@ -1100,8 +1347,8 @@ CHARSET_INFO my_charset_ucs2_bin= 1, /* strxfrm_multiply */ 2, /* mbmaxlen */ 0, - &my_charset_handler, - &my_collation_mb_bin_handler + &my_charset_ucs2_handler, + &my_collation_ucs2_bin_handler }; |