diff options
Diffstat (limited to 'strings')
-rw-r--r-- | strings/ctype-bin.c | 25 | ||||
-rw-r--r-- | strings/ctype-cp932.c | 14 | ||||
-rw-r--r-- | strings/ctype-extra.c | 4 | ||||
-rw-r--r-- | strings/ctype-mb.c | 78 | ||||
-rw-r--r-- | strings/ctype-simple.c | 4 | ||||
-rw-r--r-- | strings/ctype-uca.c | 11 | ||||
-rw-r--r-- | strings/ctype-ucs2.c | 40 | ||||
-rw-r--r-- | strings/ctype-utf8.c | 29 | ||||
-rw-r--r-- | strings/strtod.c | 55 |
9 files changed, 221 insertions, 39 deletions
diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c index e9de0ade557..f9d29ca1739 100644 --- a/strings/ctype-bin.c +++ b/strings/ctype-bin.c @@ -271,6 +271,29 @@ static int my_wc_mb_bin(CHARSET_INFO *cs __attribute__((unused)), } +void my_hash_sort_8bit_bin(CHARSET_INFO *cs __attribute__((unused)), + const uchar *key, uint len,ulong *nr1, ulong *nr2) +{ + const uchar *pos = key; + + key+= len; + + /* + Remove trailing spaces. We have to do this to be able to compare + 'A ' and 'A' as identical + */ + while (key > pos && key[-1] == ' ') + key--; + + for (; pos < (uchar*) key ; pos++) + { + nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * + ((uint)*pos)) + (nr1[0] << 8); + nr2[0]+=3; + } +} + + void my_hash_sort_bin(CHARSET_INFO *cs __attribute__((unused)), const uchar *key, uint len,ulong *nr1, ulong *nr2) { @@ -471,7 +494,7 @@ MY_COLLATION_HANDLER my_collation_8bit_bin_handler = my_wildcmp_bin, my_strcasecmp_bin, my_instr_bin, - my_hash_sort_bin, + my_hash_sort_8bit_bin, my_propagate_simple }; diff --git a/strings/ctype-cp932.c b/strings/ctype-cp932.c index 0ece0ef1270..42325648037 100644 --- a/strings/ctype-cp932.c +++ b/strings/ctype-cp932.c @@ -250,9 +250,16 @@ static int my_strnncollsp_cp932(CHARSET_INFO *cs __attribute__((unused)), const uchar *a_end= a + a_length; const uchar *b_end= b + b_length; int res= my_strnncoll_cp932_internal(cs, &a, a_length, &b, b_length); + +#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE + diff_if_only_endspace_difference= 0; +#endif + if (!res && (a != a_end || b != b_end)) { - int swap= 0; + int swap= 1; + if (diff_if_only_endspace_difference) + res= 1; /* Assume 'a' is bigger */ /* Check the next not space character of the longer key. If it's < ' ', then it's smaller than the other key. @@ -263,11 +270,12 @@ static int my_strnncollsp_cp932(CHARSET_INFO *cs __attribute__((unused)), a_end= b_end; a= b; swap= -1; /* swap sign of result */ + res= -res; } for (; a < a_end ; a++) { - if (*a != ' ') - return ((int) *a - (int) ' ') ^ swap; + if (*a != (uchar) ' ') + return (*a < (uchar) ' ') ? -swap : swap; } } return res; diff --git a/strings/ctype-extra.c b/strings/ctype-extra.c index 2a7fcbd383e..38aa3a05adf 100644 --- a/strings/ctype-extra.c +++ b/strings/ctype-extra.c @@ -923,7 +923,7 @@ uint16 to_uni_ascii_general_ci[] = { 0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067, 0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F, 0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077, -0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x0000, +0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x007F, 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, @@ -4604,7 +4604,7 @@ uint16 to_uni_ascii_bin[] = { 0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067, 0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F, 0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077, -0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x0000, +0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x007F, 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c index 40cec669766..b370714e464 100644 --- a/strings/ctype-mb.c +++ b/strings/ctype-mb.c @@ -467,6 +467,13 @@ static void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)), key+= len; + /* + Remove trailing spaces. We have to do this to be able to compare + 'A ' and 'A' as identical + */ + while (key > pos && key[-1] == ' ') + key--; + for (; pos < (uchar*) key ; pos++) { nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * @@ -556,6 +563,8 @@ my_bool my_like_range_mb(CHARSET_INFO *cs, char *min_end= min_str + res_length; char *max_end= max_str + res_length; uint maxcharlen= res_length / cs->mbmaxlen; + const char *contraction_flags= cs->contractions ? + ((const char*) cs->contractions) + 0x40*0x40 : NULL; for (; ptr != end && min_str != min_end && maxcharlen ; maxcharlen--) { @@ -564,6 +573,7 @@ my_bool my_like_range_mb(CHARSET_INFO *cs, ptr++; /* Skip escape */ else if (*ptr == w_one || *ptr == w_many) /* '_' and '%' in SQL */ { +fill_max_and_min: /* Calculate length of keys: 'a\0\0... is the smallest possible string when we have space expand @@ -595,8 +605,74 @@ my_bool my_like_range_mb(CHARSET_INFO *cs, *min_str++= *max_str++= *ptr++; } else - *min_str++= *max_str++= *ptr++; + { + /* + Special case for collations with contractions. + For example, in Chezh, 'ch' is a separate letter + which is sorted between 'h' and 'i'. + If the pattern 'abc%', 'c' at the end can mean: + - letter 'c' itself, + - beginning of the contraction 'ch'. + + If we simply return this LIKE range: + + 'abc\min\min\min' and 'abc\max\max\max' + then this query: SELECT * FROM t1 WHERE a LIKE 'abc%' + will only find values starting from 'abc[^h]', + but won't find values starting from 'abch'. + + We must ignore contraction heads followed by w_one or w_many. + ('Contraction head' means any letter which can be the first + letter in a contraction) + + For example, for Czech 'abc%', we will return LIKE range, + which is equal to LIKE range for 'ab%': + + 'ab\min\min\min\min' and 'ab\max\max\max\max'. + + */ + if (contraction_flags && ptr + 1 < end && + contraction_flags[(uchar) *ptr]) + { + /* Ptr[0] is a contraction head. */ + + if (ptr[1] == w_one || ptr[1] == w_many) + { + /* Contraction head followed by a wildcard, quit. */ + goto fill_max_and_min; + } + + /* + Some letters can be both contraction heads and contraction tails. + For example, in Danish 'aa' is a separate single letter which + is sorted after 'z'. So 'a' can be both head and tail. + + If ptr[0]+ptr[1] is a contraction, + then put both letters together. + + If ptr[1] can be a contraction part, but ptr[0]+ptr[1] + is not a contraction, then we put only ptr[0], + and continue with ptr[1] on the next loop. + */ + if (contraction_flags[(uchar) ptr[1]] && + cs->contractions[(*ptr-0x40)*0x40 + ptr[1] - 0x40]) + { + /* Contraction found */ + if (maxcharlen == 1 || min_str + 1 >= min_end) + { + /* Both contraction parts don't fit, quit */ + goto fill_max_and_min; + } + + /* Put contraction head */ + *min_str++= *max_str++= *ptr++; + maxcharlen--; + } + } + /* Put contraction tail, or a single character */ + *min_str++= *max_str++= *ptr++; + } } *min_length= *max_length = (uint) (min_str - min_org); diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c index e57204f8d33..8b1b0d6790d 100644 --- a/strings/ctype-simple.c +++ b/strings/ctype-simple.c @@ -179,8 +179,8 @@ int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *a, uint a_length, } for (end= a + a_length-length; a < end ; a++) { - if (*a != ' ') - return (*a < ' ') ? -swap : swap; + if (map[*a] != ' ') + return (map[*a] < ' ') ? -swap : swap; } } return res; diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c index 4dbda0b9239..81fb9ee1970 100644 --- a/strings/ctype-uca.c +++ b/strings/ctype-uca.c @@ -7937,10 +7937,16 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(uint)) /* Now process contractions */ if (ncontractions) { - uint size= 0x40*0x40*sizeof(uint16); /* 8K, for basic latin letter only */ + /* + 8K for weights for basic latin letter pairs, + plus 256 bytes for "is contraction part" flags. + */ + uint size= 0x40*0x40*sizeof(uint16) + 256; + char *contraction_flags; if (!(cs->contractions= (uint16*) (*alloc)(size))) return 1; bzero((void*)cs->contractions, size); + contraction_flags= ((char*) cs->contractions) + 0x40*0x40; for (i=0; i < rc; i++) { if (rule[i].curr[1]) @@ -7966,6 +7972,9 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(uint)) /* Copy base weight applying primary difference */ cs->contractions[offsc]= offsb[0] + rule[i].diff[0]; + /* Mark both letters as "is contraction part */ + contraction_flags[rule[i].curr[0]]= 1; + contraction_flags[rule[i].curr[1]]= 1; } } } diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index 6b1ba3c1ef6..b5353c55e4c 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -1484,7 +1484,10 @@ void my_hash_sort_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)), const uchar *pos = key; key+= len; - + + while (key > pos+1 && key[-1] == ' ' && key[-2] == '\0') + key-= 2; + for (; pos < (uchar*) key ; pos++) { nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * @@ -1521,6 +1524,8 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs, char *min_org=min_str; char *min_end=min_str+res_length; uint charlen= res_length / cs->mbmaxlen; + const char *contraction_flags= cs->contractions ? + ((const char*) cs->contractions) + 0x40*0x40 : NULL; for ( ; ptr + 1 < end && min_str + 1 < min_end && charlen > 0 ; ptr+=2, charlen--) @@ -1542,6 +1547,7 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs, } if (ptr[0] == '\0' && ptr[1] == w_many) /* '%' in SQL */ { +fill_max_and_min: /* Calculate length of keys: 'a\0\0... is the smallest possible string when we have space expand @@ -1558,6 +1564,38 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs, } while (min_str + 1 < min_end); return 0; } + + if (contraction_flags && ptr + 3 < end && + ptr[0] == '\0' && contraction_flags[(uchar) ptr[1]]) + { + /* Contraction head found */ + if (ptr[2] == '\0' && (ptr[3] == w_one || ptr[3] == w_many)) + { + /* Contraction head followed by a wildcard, quit */ + goto fill_max_and_min; + } + + /* + Check if the second letter can be contraction part, + and if two letters really produce a contraction. + */ + if (ptr[2] == '\0' && contraction_flags[(uchar) ptr[3]] && + cs->contractions[(ptr[1]-0x40)*0x40 + ptr[3] - 0x40]) + { + /* Contraction found */ + if (charlen == 1 || min_str + 2 >= min_end) + { + /* Full contraction doesn't fit, quit */ + goto fill_max_and_min; + } + + /* Put contraction head */ + *min_str++= *max_str++= *ptr++; + *min_str++= *max_str++= *ptr++; + charlen--; + } + } + /* Put contraction tail, or a single character */ *min_str++= *max_str++ = ptr[0]; *min_str++= *max_str++ = ptr[1]; } diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index 387ce16a43d..4682868562f 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -2802,16 +2802,19 @@ static int my_strnncoll_utf8_cs(CHARSET_INFO *cs, static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs, const uchar *s, uint slen, const uchar *t, uint tlen, - my_bool diff_if_only_endspace_difference - __attribute__((unused))) + my_bool diff_if_only_endspace_difference) { - int s_res,t_res; - my_wc_t s_wc,t_wc; - const uchar *se= s+slen; - const uchar *te= t+tlen; - int save_diff = 0; + int s_res, t_res, res; + my_wc_t s_wc, t_wc; + const uchar *se= s + slen; + const uchar *te= t + tlen; + int save_diff= 0; MY_UNICASE_INFO **uni_plane= cs->caseinfo; - + +#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE + diff_if_only_endspace_difference= 0; +#endif + while ( s < se && t < te ) { int plane; @@ -2843,16 +2846,20 @@ static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs, slen= se-s; tlen= te-t; + res= 0; if (slen != tlen) { - int swap= 0; + int swap= 1; + if (diff_if_only_endspace_difference) + res= 1; /* Assume 'a' is bigger */ if (slen < tlen) { slen= tlen; s= t; se= te; swap= -1; + res= -res; } /* This following loop uses the fact that in UTF-8 @@ -2866,8 +2873,8 @@ static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs, */ for ( ; s < se; s++) { - if (*s != ' ') - return ((int)*s - (int) ' ') ^ swap; + if (*s != (uchar) ' ') + return (*s < (uchar) ' ') ? -swap : swap; } } return save_diff; diff --git a/strings/strtod.c b/strings/strtod.c index 7196cafb2c9..5fe59d10bd2 100644 --- a/strings/strtod.c +++ b/strings/strtod.c @@ -31,13 +31,40 @@ #define MAX_DBL_EXP 308 #define MAX_RESULT_FOR_MAX_EXP 1.7976931348623157 -static double scaler10[] = { - 1.0, 1e10, 1e20, 1e30, 1e40, 1e50, 1e60, 1e70, 1e80, 1e90 -}; -static double scaler1[] = { - 1.0, 10.0, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9 -}; +const double log_10[] = { + 1e000, 1e001, 1e002, 1e003, 1e004, 1e005, 1e006, 1e007, 1e008, 1e009, + 1e010, 1e011, 1e012, 1e013, 1e014, 1e015, 1e016, 1e017, 1e018, 1e019, + 1e020, 1e021, 1e022, 1e023, 1e024, 1e025, 1e026, 1e027, 1e028, 1e029, + 1e030, 1e031, 1e032, 1e033, 1e034, 1e035, 1e036, 1e037, 1e038, 1e039, + 1e040, 1e041, 1e042, 1e043, 1e044, 1e045, 1e046, 1e047, 1e048, 1e049, + 1e050, 1e051, 1e052, 1e053, 1e054, 1e055, 1e056, 1e057, 1e058, 1e059, + 1e060, 1e061, 1e062, 1e063, 1e064, 1e065, 1e066, 1e067, 1e068, 1e069, + 1e070, 1e071, 1e072, 1e073, 1e074, 1e075, 1e076, 1e077, 1e078, 1e079, + 1e080, 1e081, 1e082, 1e083, 1e084, 1e085, 1e086, 1e087, 1e088, 1e089, + 1e090, 1e091, 1e092, 1e093, 1e094, 1e095, 1e096, 1e097, 1e098, 1e099, + 1e100, 1e101, 1e102, 1e103, 1e104, 1e105, 1e106, 1e107, 1e108, 1e109, + 1e110, 1e111, 1e112, 1e113, 1e114, 1e115, 1e116, 1e117, 1e118, 1e119, + 1e120, 1e121, 1e122, 1e123, 1e124, 1e125, 1e126, 1e127, 1e128, 1e129, + 1e130, 1e131, 1e132, 1e133, 1e134, 1e135, 1e136, 1e137, 1e138, 1e139, + 1e140, 1e141, 1e142, 1e143, 1e144, 1e145, 1e146, 1e147, 1e148, 1e149, + 1e150, 1e151, 1e152, 1e153, 1e154, 1e155, 1e156, 1e157, 1e158, 1e159, + 1e160, 1e161, 1e162, 1e163, 1e164, 1e165, 1e166, 1e167, 1e168, 1e169, + 1e170, 1e171, 1e172, 1e173, 1e174, 1e175, 1e176, 1e177, 1e178, 1e179, + 1e180, 1e181, 1e182, 1e183, 1e184, 1e185, 1e186, 1e187, 1e188, 1e189, + 1e190, 1e191, 1e192, 1e193, 1e194, 1e195, 1e196, 1e197, 1e198, 1e199, + 1e200, 1e201, 1e202, 1e203, 1e204, 1e205, 1e206, 1e207, 1e208, 1e209, + 1e210, 1e211, 1e212, 1e213, 1e214, 1e215, 1e216, 1e217, 1e218, 1e219, + 1e220, 1e221, 1e222, 1e223, 1e224, 1e225, 1e226, 1e227, 1e228, 1e229, + 1e230, 1e231, 1e232, 1e233, 1e234, 1e235, 1e236, 1e237, 1e238, 1e239, + 1e240, 1e241, 1e242, 1e243, 1e244, 1e245, 1e246, 1e247, 1e248, 1e249, + 1e250, 1e251, 1e252, 1e253, 1e254, 1e255, 1e256, 1e257, 1e258, 1e259, + 1e260, 1e261, 1e262, 1e263, 1e264, 1e265, 1e266, 1e267, 1e268, 1e269, + 1e270, 1e271, 1e272, 1e273, 1e274, 1e275, 1e276, 1e277, 1e278, 1e279, + 1e280, 1e281, 1e282, 1e283, 1e284, 1e285, 1e286, 1e287, 1e288, 1e289, + 1e290, 1e291, 1e292, 1e293, 1e294, 1e295, 1e296, 1e297, 1e298, 1e299, + 1e300, 1e301, 1e302, 1e303, 1e304, 1e305, 1e306, 1e307, 1e308 +}; /* Convert string to double (string doesn't have to be null terminated) @@ -57,7 +84,7 @@ double my_strtod(const char *str, char **end_ptr, int *error) { double result= 0.0; uint negative= 0, ndigits, dec_digits= 0, neg_exp= 0; - int exponent= 0, digits_after_dec_point= 0, tmp_exp; + int exponent= 0, digits_after_dec_point= 0, tmp_exp, step; const char *old_str, *end= *end_ptr, *start_of_number; char next_char; my_bool overflow=0; @@ -179,16 +206,10 @@ double my_strtod(const char *str, char **end_ptr, int *error) exponent= -exponent; neg_exp= 1; /* neg_exp was 0 before */ } - while (exponent >= 100) - { - result= neg_exp ? result/1.0e100 : result*1.0e100; - exponent-= 100; - } - scaler= scaler10[exponent/10]*scaler1[exponent%10]; - if (neg_exp) - result/= scaler; - else - result*= scaler; + step= array_elements(log_10) - 1; + for (; exponent > step; exponent-= step) + result= neg_exp ? result / log_10[step] : result * log_10[step]; + result= neg_exp ? result / log_10[exponent] : result * log_10[exponent]; } done: |