diff options
Diffstat (limited to 'strings/ctype-mb.c')
-rw-r--r-- | strings/ctype-mb.c | 199 |
1 files changed, 192 insertions, 7 deletions
diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c index 76af3ef2cdf..8d864dfaad5 100644 --- a/strings/ctype-mb.c +++ b/strings/ctype-mb.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2000 MySQL AB +/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -636,7 +636,7 @@ static void pad_max_char(CHARSET_INFO *cs, char *str, char *end) DBUG_ASSERT(buflen > 0); do { - if ((str + buflen) < end) + if ((str + buflen) <= end) { /* Enough space for the characer */ memcpy(str, buf, buflen); @@ -683,7 +683,6 @@ my_bool my_like_range_mb(CHARSET_INFO *cs, char *min_end= min_str + res_length; char *max_end= max_str + res_length; size_t maxcharlen= res_length / cs->mbmaxlen; - my_bool have_contractions= my_uca_have_contractions(cs); for (; ptr != end && min_str != min_end && maxcharlen ; maxcharlen--) { @@ -751,8 +750,8 @@ fill_max_and_min: 'ab\min\min\min\min' and 'ab\max\max\max\max'. */ - if (have_contractions && ptr + 1 < end && - my_uca_can_be_contraction_head(cs, (uchar) *ptr)) + if (contraction_flags && ptr + 1 < end && + contraction_flags[(uchar) *ptr]) { /* Ptr[0] is a contraction head. */ @@ -774,8 +773,8 @@ fill_max_and_min: is not a contraction, then we put only ptr[0], and continue with ptr[1] on the next loop. */ - if (my_uca_can_be_contraction_tail(cs, (uchar) ptr[1]) && - my_uca_contraction2_weight(cs, (uchar) ptr[0], (uchar) ptr[1])) + if (contraction_flags[(uchar) ptr[1]] && + cs->contractions[(*ptr-0x40)*0x40 + ptr[1] - 0x40]) { /* Contraction found */ if (maxcharlen == 1 || min_str + 1 >= min_end) @@ -801,6 +800,192 @@ fill_max_and_min: } +/** + Calculate min_str and max_str that ranges a LIKE string. + Generic function, currently used for ucs2, utf16, utf32, + but should be suitable for any other character sets with + cs->min_sort_char and cs->max_sort_char represented in + Unicode code points. + + @param cs Character set and collation pointer + @param ptr Pointer to LIKE pattern. + @param ptr_length Length of LIKE pattern. + @param escape Escape character pattern, typically '\'. + @param w_one 'One character' pattern, typically '_'. + @param w_many 'Many characters' pattern, typically '%'. + @param res_length Length of min_str and max_str. + + @param[out] min_str Smallest string that ranges LIKE. + @param[out] max_str Largest string that ranges LIKE. + @param[out] min_len Length of min_str + @param[out] max_len Length of max_str + + @return Optimization status. + @retval FALSE if LIKE pattern can be optimized + @rerval TRUE if LIKE can't be optimized. +*/ +my_bool +my_like_range_generic(CHARSET_INFO *cs, + const char *ptr, size_t ptr_length, + pbool escape, pbool w_one, pbool w_many, + size_t res_length, + char *min_str,char *max_str, + size_t *min_length,size_t *max_length) +{ + const char *end= ptr + ptr_length; + const char *min_org= min_str; + const char *max_org= max_str; + char *min_end= min_str + res_length; + char *max_end= max_str + res_length; + size_t charlen= res_length / cs->mbmaxlen; + size_t res_length_diff; + my_bool have_contractions= my_cs_have_contractions(cs); + + for ( ; charlen > 0; charlen--) + { + my_wc_t wc, wc2; + int res; + if ((res= cs->cset->mb_wc(cs, &wc, (uchar*) ptr, (uchar*) end)) <= 0) + { + if (res == MY_CS_ILSEQ) /* Bad sequence */ + return TRUE; /* min_length and max_length are not important */ + break; /* End of the string */ + } + ptr+= res; + + if (wc == (my_wc_t) escape) + { + if ((res= cs->cset->mb_wc(cs, &wc, (uchar*) ptr, (uchar*) end)) <= 0) + { + if (res == MY_CS_ILSEQ) + return TRUE; /* min_length and max_length are not important */ + /* + End of the string: Escape is the last character. + Put escape as a normal character. + We'll will leave the loop on the next iteration. + */ + } + else + ptr+= res; + + /* Put escape character to min_str and max_str */ + if ((res= cs->cset->wc_mb(cs, wc, + (uchar*) min_str, (uchar*) min_end)) <= 0) + goto pad_set_lengths; /* No space */ + min_str+= res; + + if ((res= cs->cset->wc_mb(cs, wc, + (uchar*) max_str, (uchar*) max_end)) <= 0) + goto pad_set_lengths; /* No space */ + max_str+= res; + continue; + } + else if (wc == (my_wc_t) w_one) + { + if ((res= cs->cset->wc_mb(cs, cs->min_sort_char, + (uchar*) min_str, (uchar*) min_end)) <= 0) + goto pad_set_lengths; + min_str+= res; + + if ((res= cs->cset->wc_mb(cs, cs->max_sort_char, + (uchar*) max_str, (uchar*) max_end)) <= 0) + goto pad_set_lengths; + max_str+= res; + continue; + } + else if (wc == (my_wc_t) w_many) + { + /* + Calculate length of keys: + a\min\min... is the smallest possible string + a\max\max... is the biggest possible string + */ + *min_length= ((cs->state & MY_CS_BINSORT) ? + (size_t) (min_str - min_org) : + res_length); + *max_length= res_length; + goto pad_min_max; + } + + if (have_contractions && + my_cs_can_be_contraction_head(cs, wc) && + (res= cs->cset->mb_wc(cs, &wc2, (uchar*) ptr, (uchar*) end)) > 0) + { + uint16 *weight; + if ((wc2 == (my_wc_t) w_one || wc2 == (my_wc_t) w_many)) + { + /* Contraction head followed by a wildcard */ + *min_length= *max_length= res_length; + goto pad_min_max; + } + + if (my_cs_can_be_contraction_tail(cs, wc2) && + (weight= my_cs_contraction2_weight(cs, wc, wc2)) && weight[0]) + { + /* Contraction found */ + if (charlen == 1) + { + /* contraction does not fit to result */ + *min_length= *max_length= res_length; + goto pad_min_max; + } + + ptr+= res; + charlen--; + + /* Put contraction head */ + if ((res= cs->cset->wc_mb(cs, wc, + (uchar*) min_str, (uchar*) min_end)) <= 0) + goto pad_set_lengths; + min_str+= res; + + if ((res= cs->cset->wc_mb(cs, wc, + (uchar*) max_str, (uchar*) max_end)) <= 0) + goto pad_set_lengths; + max_str+= res; + wc= wc2; /* Prepare to put contraction tail */ + } + } + + /* Normal character, or contraction tail */ + if ((res= cs->cset->wc_mb(cs, wc, + (uchar*) min_str, (uchar*) min_end)) <= 0) + goto pad_set_lengths; + min_str+= res; + if ((res= cs->cset->wc_mb(cs, wc, + (uchar*) max_str, (uchar*) max_end)) <= 0) + goto pad_set_lengths; + max_str+= res; + } + +pad_set_lengths: + *min_length= (size_t) (min_str - min_org); + *max_length= (size_t) (max_str - max_org); + +pad_min_max: + /* + Fill up max_str and min_str to res_length. + fill() cannot set incomplete characters and + requires that "length" argument is divisible to mbminlen. + Make sure to call fill() with proper "length" argument. + */ + res_length_diff= res_length % cs->mbminlen; + cs->cset->fill(cs, min_str, min_end - min_str - res_length_diff, + cs->min_sort_char); + cs->cset->fill(cs, max_str, max_end - max_str - res_length_diff, + cs->max_sort_char); + + /* In case of incomplete characters set the remainder to 0x00's */ + if (res_length_diff) + { + /* Example: odd res_length for ucs2 */ + memset(min_end - res_length_diff, 0, res_length_diff); + memset(max_end - res_length_diff, 0, res_length_diff); + } + return FALSE; +} + + int my_wildcmp_mb_bin(CHARSET_INFO *cs, const char *str,const char *str_end, |