summaryrefslogtreecommitdiff
path: root/strings
diff options
context:
space:
mode:
authorunknown <bar@mysql.com>2004-10-18 15:23:24 +0500
committerunknown <bar@mysql.com>2004-10-18 15:23:24 +0500
commit5267ec8a5ac0ce18857ace639382e06631e0a62f (patch)
tree8f676b681ea799bf7fbdb31b3bfb29430f1640d6 /strings
parent2310f00af2bedf78a98836ab953f7dfc71654d3d (diff)
downloadmariadb-git-5267ec8a5ac0ce18857ace639382e06631e0a62f.tar.gz
Bug #6040 can't retrieve records with umlaut characters in case insensitive manner
Diffstat (limited to 'strings')
-rw-r--r--strings/ctype-ucs2.c166
-rw-r--r--strings/ctype-utf8.c168
2 files changed, 171 insertions, 163 deletions
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index 3247e1d7424..851c2044f47 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -1231,172 +1231,14 @@ uint my_lengthsp_ucs2(CHARSET_INFO *cs __attribute__((unused)),
}
-/*
-** Compare string against string with wildcard
-** 0 if matched
-** -1 if not matched with wildcard
-** 1 if matched with wildcard
-*/
-
-static
-int my_wildcmp_ucs2(CHARSET_INFO *cs,
- const char *str,const char *str_end,
- const char *wildstr,const char *wildend,
- int escape, int w_one, int w_many,
- MY_UNICASE_INFO **weights)
-{
- int result= -1; /* Not found, using wildcards */
- my_wc_t s_wc, w_wc;
- int scan, plane;
-
- while (wildstr != wildend)
- {
-
- while (1)
- {
- scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr,
- (const uchar*)wildend);
- if (scan <= 0)
- return 1;
-
- if (w_wc == (my_wc_t)escape)
- {
- wildstr+= scan;
- scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr,
- (const uchar*)wildend);
- if (scan <= 0)
- return 1;
- }
-
- if (w_wc == (my_wc_t)w_many)
- {
- result= 1; /* Found an anchor char */
- break;
- }
-
- wildstr+= scan;
- scan= my_ucs2_uni(cs, &s_wc, (const uchar*)str, (const uchar*)str_end);
- if (scan <=0)
- return 1;
- str+= scan;
-
- if (w_wc == (my_wc_t)w_one)
- {
- result= 1; /* Found an anchor char */
- }
- else
- {
- if (weights)
- {
- plane=(s_wc>>8) & 0xFF;
- s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc;
- plane=(w_wc>>8) & 0xFF;
- w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc;
- }
- if (s_wc != w_wc)
- return 1; /* No match */
- }
- if (wildstr == wildend)
- return (str != str_end); /* Match if both are at end */
- }
-
-
- if (w_wc == (my_wc_t)w_many)
- { /* Found w_many */
-
- /* Remove any '%' and '_' from the wild search string */
- for ( ; wildstr != wildend ; )
- {
- scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr,
- (const uchar*)wildend);
- if (scan <= 0)
- return 1;
-
- if (w_wc == (my_wc_t)w_many)
- {
- wildstr+= scan;
- continue;
- }
-
- if (w_wc == (my_wc_t)w_one)
- {
- wildstr+= scan;
- scan= my_ucs2_uni(cs, &s_wc, (const uchar*)str,
- (const uchar*)str_end);
- if (scan <=0)
- return 1;
- str+= scan;
- continue;
- }
- break; /* Not a wild character */
- }
-
- if (wildstr == wildend)
- return 0; /* Ok if w_many is last */
-
- if (str == str_end)
- return -1;
-
- scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr,
- (const uchar*)wildend);
- if (scan <= 0)
- return 1;
-
- if (w_wc == (my_wc_t)escape)
- {
- wildstr+= scan;
- scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr,
- (const uchar*)wildend);
- if (scan <= 0)
- return 1;
- }
-
- while (1)
- {
- /* Skip until the first character from wildstr is found */
- while (str != str_end)
- {
- scan= my_ucs2_uni(cs,&s_wc, (const uchar*)str,
- (const uchar*)str_end);
- if (scan <= 0)
- return 1;
- if (weights)
- {
- plane=(s_wc>>8) & 0xFF;
- s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc;
- plane=(w_wc>>8) & 0xFF;
- w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc;
- }
-
- if (s_wc == w_wc)
- break;
- str+= scan;
- }
- if (str == str_end)
- return -1;
-
- result= my_wildcmp_ucs2(cs,str,str_end,wildstr,wildend,escape,
- w_one,w_many,weights);
-
- if (result <= 0)
- return result;
-
- str+= scan;
- }
- }
- }
- return (str != str_end ? 1 : 0);
-}
-
-
static
int my_wildcmp_ucs2_ci(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many)
{
- return my_wildcmp_ucs2(cs,str,str_end,wildstr,wildend,
- escape,w_one,w_many,uni_plane);
+ return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
+ escape,w_one,w_many,uni_plane);
}
@@ -1406,8 +1248,8 @@ int my_wildcmp_ucs2_bin(CHARSET_INFO *cs,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many)
{
- return my_wildcmp_ucs2(cs,str,str_end,wildstr,wildend,
- escape,w_one,w_many,NULL);
+ return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
+ escape,w_one,w_many,NULL);
}
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index fd6610b72b1..f5d40fb8ded 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -1518,6 +1518,161 @@ MY_UNICASE_INFO *uni_plane[256]={
};
+
+/*
+** Compare string against string with wildcard
+** This function is used in UTF8 and UCS2
+**
+** 0 if matched
+** -1 if not matched with wildcard
+** 1 if matched with wildcard
+*/
+
+int my_wildcmp_unicode(CHARSET_INFO *cs,
+ const char *str,const char *str_end,
+ const char *wildstr,const char *wildend,
+ int escape, int w_one, int w_many,
+ MY_UNICASE_INFO **weights)
+{
+ int result= -1; /* Not found, using wildcards */
+ my_wc_t s_wc, w_wc;
+ int scan, plane;
+ int (*mb_wc)(struct charset_info_st *cs, my_wc_t *wc,
+ const unsigned char *s,const unsigned char *e);
+ mb_wc= cs->cset->mb_wc;
+
+ while (wildstr != wildend)
+ {
+ while (1)
+ {
+ if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
+ (const uchar*)wildend)) <= 0)
+ return 1;
+
+ if (w_wc == (my_wc_t)escape)
+ {
+ wildstr+= scan;
+ if ((scan= mb_wc(cs,&w_wc, (const uchar*)wildstr,
+ (const uchar*)wildend)) <= 0)
+ return 1;
+ }
+
+ if (w_wc == (my_wc_t)w_many)
+ {
+ result= 1; /* Found an anchor char */
+ break;
+ }
+
+ wildstr+= scan;
+ if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
+ (const uchar*)str_end)) <=0)
+ return 1;
+ str+= scan;
+
+ if (w_wc == (my_wc_t)w_one)
+ {
+ result= 1; /* Found an anchor char */
+ }
+ else
+ {
+ if (weights)
+ {
+ plane=(s_wc>>8) & 0xFF;
+ s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc;
+ plane=(w_wc>>8) & 0xFF;
+ w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc;
+ }
+ if (s_wc != w_wc)
+ return 1; /* No match */
+ }
+ if (wildstr == wildend)
+ return (str != str_end); /* Match if both are at end */
+ }
+
+
+ if (w_wc == (my_wc_t)w_many)
+ { /* Found w_many */
+
+ /* Remove any '%' and '_' from the wild search string */
+ for ( ; wildstr != wildend ; )
+ {
+ if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
+ (const uchar*)wildend)) <= 0)
+ return 1;
+
+ if (w_wc == (my_wc_t)w_many)
+ {
+ wildstr+= scan;
+ continue;
+ }
+
+ if (w_wc == (my_wc_t)w_one)
+ {
+ wildstr+= scan;
+ if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
+ (const uchar*)str_end)) <=0)
+ return 1;
+ str+= scan;
+ continue;
+ }
+ break; /* Not a wild character */
+ }
+
+ if (wildstr == wildend)
+ return 0; /* Ok if w_many is last */
+
+ if (str == str_end)
+ return -1;
+
+ if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
+ (const uchar*)wildend)) <=0)
+ return 1;
+
+ if (w_wc == (my_wc_t)escape)
+ {
+ wildstr+= scan;
+ if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
+ (const uchar*)wildend)) <=0)
+ return 1;
+ }
+
+ while (1)
+ {
+ /* Skip until the first character from wildstr is found */
+ while (str != str_end)
+ {
+ if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
+ (const uchar*)str_end)) <=0)
+ return 1;
+ if (weights)
+ {
+ plane=(s_wc>>8) & 0xFF;
+ s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc;
+ plane=(w_wc>>8) & 0xFF;
+ w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc;
+ }
+
+ if (s_wc == w_wc)
+ break;
+ str+= scan;
+ }
+ if (str == str_end)
+ return -1;
+
+ result= my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
+ escape, w_one, w_many,
+ weights);
+
+ if (result <= 0)
+ return result;
+
+ str+= scan;
+ }
+ }
+ }
+ return (str != str_end ? 1 : 0);
+}
+
#endif
@@ -1992,6 +2147,17 @@ static int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t)
return my_strncasecmp_utf8(cs, s, t, len);
}
+static
+int my_wildcmp_utf8(CHARSET_INFO *cs,
+ const char *str,const char *str_end,
+ const char *wildstr,const char *wildend,
+ int escape, int w_one, int w_many)
+{
+ return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
+ escape,w_one,w_many,uni_plane);
+}
+
+
static int my_strnxfrm_utf8(CHARSET_INFO *cs,
uchar *dst, uint dstlen,
const uchar *src, uint srclen)
@@ -2060,7 +2226,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
my_strnncollsp_utf8,
my_strnxfrm_utf8,
my_like_range_mb,
- my_wildcmp_mb,
+ my_wildcmp_utf8,
my_strcasecmp_utf8,
my_instr_mb,
my_hash_sort_utf8