diff options
author | Alexander Barkov <bar@mariadb.com> | 2018-10-15 13:22:18 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mariadb.com> | 2018-10-15 13:22:18 +0400 |
commit | 34f8a4071e2a77e3263f0fbf2adf1c9e3f8464b1 (patch) | |
tree | 1cf8f34a2bdac4e293da332f3e0aba863238b51e /strings | |
parent | ae3fe14c17b4982fd576f65dccc5aba2e630cb31 (diff) | |
download | mariadb-git-34f8a4071e2a77e3263f0fbf2adf1c9e3f8464b1.tar.gz |
MDEV-17064 LIKE function has error behavior on the fields in which the collation is xxx_unicode_xx
Synchronizing sources in:
- my_wildcmp_uca_impl() handling utf8_unicode_ci
- my_wildcmp_unicode_impl() handling utf8_general_ci
The latter has already had a fix for a similar MySQL bug in utf8_general_ci:
Bug#11754 SET NAMES utf8 followed by SELECT "A\\" LIKE "A\\" returns 0
So fix is now propagated to utf8_unicode_ci.
Diffstat (limited to 'strings')
-rw-r--r-- | strings/ctype-uca.c | 116 | ||||
-rw-r--r-- | strings/ctype-utf8.c | 46 |
2 files changed, 80 insertions, 82 deletions
diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c index 2351ee9d932..4ccf8170c3e 100644 --- a/strings/ctype-uca.c +++ b/strings/ctype-uca.c @@ -21069,11 +21069,11 @@ int my_wildcmp_uca_impl(CHARSET_INFO *cs, const char *wildstr,const char *wildend, int escape, int w_one, int w_many, int recurse_level) { - int result= -1; /* Not found, using wildcards */ + int result= -1; /* Not found, using wildcards */ my_wc_t s_wc, w_wc; int scan; my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc; - + if (my_string_stack_guard && my_string_stack_guard(recurse_level)) return 1; while (wildstr != wildend) @@ -21082,119 +21082,121 @@ int my_wildcmp_uca_impl(CHARSET_INFO *cs, { my_bool escaped= 0; if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, - (const uchar*)wildend)) <= 0) - return 1; + (const uchar*)wildend)) <= 0) + return 1; - if (w_wc == (my_wc_t)w_many) + if (w_wc == (my_wc_t) w_many) { - result= 1; /* Found an anchor char */ + result= 1; /* Found an anchor char */ break; } wildstr+= scan; - if (w_wc == (my_wc_t)escape) + if (w_wc == (my_wc_t) escape && wildstr < wildend) { if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, - (const uchar*)wildend)) <= 0) + (const uchar*)wildend)) <= 0) return 1; wildstr+= scan; escaped= 1; } - + if ((scan= mb_wc(cs, &s_wc, (const uchar*)str, - (const uchar*)str_end)) <= 0) + (const uchar*)str_end)) <= 0) return 1; str+= scan; - - if (!escaped && w_wc == (my_wc_t)w_one) + + if (!escaped && w_wc == (my_wc_t) w_one) { - result= 1; /* Found an anchor char */ + result= 1; /* Found an anchor char */ } else { if (my_uca_charcmp(cs,s_wc,w_wc)) - return 1; + return 1; /* No match */ } if (wildstr == wildend) - return (str != str_end); /* Match if both are at end */ + return (str != str_end); /* Match if both are at end */ } - - - if (w_wc == (my_wc_t)w_many) - { /* Found w_many */ - + + if (w_wc == (my_wc_t) w_many) + { /* Found w_many */ /* Remove any '%' and '_' from the wild search string */ for ( ; wildstr != wildend ; ) { if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, - (const uchar*)wildend)) <= 0) + (const uchar*)wildend)) <= 0) return 1; - - if (w_wc == (my_wc_t)w_many) - { - wildstr+= scan; - continue; - } - - if (w_wc == (my_wc_t)w_one) - { - wildstr+= scan; - if ((scan= mb_wc(cs, &s_wc, (const uchar*)str, - (const uchar*)str_end)) <= 0) + + if (w_wc == (my_wc_t) w_many) + { + wildstr+= scan; + continue; + } + + if (w_wc == (my_wc_t) w_one) + { + wildstr+= scan; + if ((scan= mb_wc(cs, &s_wc, (const uchar*)str, + (const uchar*)str_end)) <= 0) return 1; str+= scan; - continue; - } - break; /* Not a wild character */ + continue; + } + break; /* Not a wild character */ } - + if (wildstr == wildend) - return 0; /* Ok if w_many is last */ - + return 0; /* Ok if w_many is last */ + if (str == str_end) - return -1; - + return -1; + if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, - (const uchar*)wildend)) <= 0) + (const uchar*)wildend)) <= 0) return 1; - - if (w_wc == (my_wc_t)escape) + wildstr+= scan; + + if (w_wc == (my_wc_t) escape) { - wildstr+= scan; - if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, - (const uchar*)wildend)) <= 0) - return 1; + if (wildstr < wildend) + { + if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, + (const uchar*)wildend)) <= 0) + return 1; + wildstr+= scan; + } } - + while (1) { /* Skip until the first character from wildstr is found */ while (str != str_end) { if ((scan= mb_wc(cs, &s_wc, (const uchar*)str, - (const uchar*)str_end)) <= 0) + (const uchar*)str_end)) <= 0) return 1; - + if (!my_uca_charcmp(cs,s_wc,w_wc)) break; str+= scan; } if (str == str_end) return -1; - + + str+= scan; result= my_wildcmp_uca_impl(cs, str, str_end, wildstr, wildend, - escape, w_one, w_many, recurse_level+1); - + escape, w_one, w_many, + recurse_level + 1); if (result <= 0) return result; - - str+= scan; - } + } } } return (str != str_end ? 1 : 0); } + int my_wildcmp_uca(CHARSET_INFO *cs, const char *str,const char *str_end, const char *wildstr,const char *wildend, diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index 7d7e61ce511..3cb832c5414 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -4400,9 +4400,7 @@ int my_wildcmp_unicode_impl(CHARSET_INFO *cs, int result= -1; /* Not found, using wildcards */ my_wc_t s_wc, w_wc; int scan; - int (*mb_wc)(CHARSET_INFO *, my_wc_t *, - const uchar *, const uchar *); - mb_wc= cs->cset->mb_wc; + my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc; if (my_string_stack_guard && my_string_stack_guard(recurse_level)) return 1; @@ -4430,12 +4428,12 @@ int my_wildcmp_unicode_impl(CHARSET_INFO *cs, wildstr+= scan; escaped= 1; } - + if ((scan= mb_wc(cs, &s_wc, (const uchar*)str, (const uchar*)str_end)) <= 0) return 1; str+= scan; - + if (!escaped && w_wc == (my_wc_t) w_one) { result= 1; /* Found an anchor char */ @@ -4453,86 +4451,84 @@ int my_wildcmp_unicode_impl(CHARSET_INFO *cs, if (wildstr == wildend) return (str != str_end); /* Match if both are at end */ } - - + if (w_wc == (my_wc_t) w_many) { /* Found w_many */ - /* Remove any '%' and '_' from the wild search string */ for ( ; wildstr != wildend ; ) { if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, (const uchar*)wildend)) <= 0) return 1; - - if (w_wc == (my_wc_t)w_many) + + if (w_wc == (my_wc_t) w_many) { wildstr+= scan; continue; } - - if (w_wc == (my_wc_t)w_one) + + if (w_wc == (my_wc_t) w_one) { wildstr+= scan; if ((scan= mb_wc(cs, &s_wc, (const uchar*)str, - (const uchar*)str_end)) <=0) + (const uchar*)str_end)) <= 0) return 1; str+= scan; continue; } break; /* Not a wild character */ } - + if (wildstr == wildend) return 0; /* Ok if w_many is last */ - + if (str == str_end) return -1; - + if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, - (const uchar*)wildend)) <=0) + (const uchar*)wildend)) <= 0) return 1; wildstr+= scan; - - if (w_wc == (my_wc_t)escape) + + if (w_wc == (my_wc_t) escape) { if (wildstr < wildend) { if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, - (const uchar*)wildend)) <=0) + (const uchar*)wildend)) <= 0) return 1; wildstr+= scan; } } - + while (1) { /* Skip until the first character from wildstr is found */ while (str != str_end) { if ((scan= mb_wc(cs, &s_wc, (const uchar*)str, - (const uchar*)str_end)) <=0) + (const uchar*)str_end)) <= 0) return 1; if (weights) { my_tosort_unicode(weights, &s_wc, cs->state); my_tosort_unicode(weights, &w_wc, cs->state); } - + if (s_wc == w_wc) break; str+= scan; } if (str == str_end) return -1; - + str+= scan; result= my_wildcmp_unicode_impl(cs, str, str_end, wildstr, wildend, escape, w_one, w_many, weights, recurse_level + 1); if (result <= 0) return result; - } + } } } return (str != str_end ? 1 : 0); |