summaryrefslogtreecommitdiff
path: root/strings
diff options
context:
space:
mode:
Diffstat (limited to 'strings')
-rw-r--r--strings/ctype-bin.c25
-rw-r--r--strings/ctype-cp932.c14
-rw-r--r--strings/ctype-extra.c4
-rw-r--r--strings/ctype-mb.c78
-rw-r--r--strings/ctype-simple.c4
-rw-r--r--strings/ctype-uca.c11
-rw-r--r--strings/ctype-ucs2.c40
-rw-r--r--strings/ctype-utf8.c29
-rw-r--r--strings/strtod.c55
9 files changed, 221 insertions, 39 deletions
diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c
index e9de0ade557..f9d29ca1739 100644
--- a/strings/ctype-bin.c
+++ b/strings/ctype-bin.c
@@ -271,6 +271,29 @@ static int my_wc_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
}
+void my_hash_sort_8bit_bin(CHARSET_INFO *cs __attribute__((unused)),
+ const uchar *key, uint len,ulong *nr1, ulong *nr2)
+{
+ const uchar *pos = key;
+
+ key+= len;
+
+ /*
+ Remove trailing spaces. We have to do this to be able to compare
+ 'A ' and 'A' as identical
+ */
+ while (key > pos && key[-1] == ' ')
+ key--;
+
+ for (; pos < (uchar*) key ; pos++)
+ {
+ nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) *
+ ((uint)*pos)) + (nr1[0] << 8);
+ nr2[0]+=3;
+ }
+}
+
+
void my_hash_sort_bin(CHARSET_INFO *cs __attribute__((unused)),
const uchar *key, uint len,ulong *nr1, ulong *nr2)
{
@@ -471,7 +494,7 @@ MY_COLLATION_HANDLER my_collation_8bit_bin_handler =
my_wildcmp_bin,
my_strcasecmp_bin,
my_instr_bin,
- my_hash_sort_bin,
+ my_hash_sort_8bit_bin,
my_propagate_simple
};
diff --git a/strings/ctype-cp932.c b/strings/ctype-cp932.c
index 0ece0ef1270..42325648037 100644
--- a/strings/ctype-cp932.c
+++ b/strings/ctype-cp932.c
@@ -250,9 +250,16 @@ static int my_strnncollsp_cp932(CHARSET_INFO *cs __attribute__((unused)),
const uchar *a_end= a + a_length;
const uchar *b_end= b + b_length;
int res= my_strnncoll_cp932_internal(cs, &a, a_length, &b, b_length);
+
+#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
+ diff_if_only_endspace_difference= 0;
+#endif
+
if (!res && (a != a_end || b != b_end))
{
- int swap= 0;
+ int swap= 1;
+ if (diff_if_only_endspace_difference)
+ res= 1; /* Assume 'a' is bigger */
/*
Check the next not space character of the longer key. If it's < ' ',
then it's smaller than the other key.
@@ -263,11 +270,12 @@ static int my_strnncollsp_cp932(CHARSET_INFO *cs __attribute__((unused)),
a_end= b_end;
a= b;
swap= -1; /* swap sign of result */
+ res= -res;
}
for (; a < a_end ; a++)
{
- if (*a != ' ')
- return ((int) *a - (int) ' ') ^ swap;
+ if (*a != (uchar) ' ')
+ return (*a < (uchar) ' ') ? -swap : swap;
}
}
return res;
diff --git a/strings/ctype-extra.c b/strings/ctype-extra.c
index 2a7fcbd383e..38aa3a05adf 100644
--- a/strings/ctype-extra.c
+++ b/strings/ctype-extra.c
@@ -923,7 +923,7 @@ uint16 to_uni_ascii_general_ci[] = {
0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067,
0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F,
0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077,
-0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x0000,
+0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x007F,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
@@ -4604,7 +4604,7 @@ uint16 to_uni_ascii_bin[] = {
0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067,
0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F,
0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077,
-0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x0000,
+0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x007F,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c
index 40cec669766..b370714e464 100644
--- a/strings/ctype-mb.c
+++ b/strings/ctype-mb.c
@@ -467,6 +467,13 @@ static void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
key+= len;
+ /*
+ Remove trailing spaces. We have to do this to be able to compare
+ 'A ' and 'A' as identical
+ */
+ while (key > pos && key[-1] == ' ')
+ key--;
+
for (; pos < (uchar*) key ; pos++)
{
nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) *
@@ -556,6 +563,8 @@ my_bool my_like_range_mb(CHARSET_INFO *cs,
char *min_end= min_str + res_length;
char *max_end= max_str + res_length;
uint maxcharlen= res_length / cs->mbmaxlen;
+ const char *contraction_flags= cs->contractions ?
+ ((const char*) cs->contractions) + 0x40*0x40 : NULL;
for (; ptr != end && min_str != min_end && maxcharlen ; maxcharlen--)
{
@@ -564,6 +573,7 @@ my_bool my_like_range_mb(CHARSET_INFO *cs,
ptr++; /* Skip escape */
else if (*ptr == w_one || *ptr == w_many) /* '_' and '%' in SQL */
{
+fill_max_and_min:
/*
Calculate length of keys:
'a\0\0... is the smallest possible string when we have space expand
@@ -595,8 +605,74 @@ my_bool my_like_range_mb(CHARSET_INFO *cs,
*min_str++= *max_str++= *ptr++;
}
else
- *min_str++= *max_str++= *ptr++;
+ {
+ /*
+ Special case for collations with contractions.
+ For example, in Chezh, 'ch' is a separate letter
+ which is sorted between 'h' and 'i'.
+ If the pattern 'abc%', 'c' at the end can mean:
+ - letter 'c' itself,
+ - beginning of the contraction 'ch'.
+
+ If we simply return this LIKE range:
+
+ 'abc\min\min\min' and 'abc\max\max\max'
+ then this query: SELECT * FROM t1 WHERE a LIKE 'abc%'
+ will only find values starting from 'abc[^h]',
+ but won't find values starting from 'abch'.
+
+ We must ignore contraction heads followed by w_one or w_many.
+ ('Contraction head' means any letter which can be the first
+ letter in a contraction)
+
+ For example, for Czech 'abc%', we will return LIKE range,
+ which is equal to LIKE range for 'ab%':
+
+ 'ab\min\min\min\min' and 'ab\max\max\max\max'.
+
+ */
+ if (contraction_flags && ptr + 1 < end &&
+ contraction_flags[(uchar) *ptr])
+ {
+ /* Ptr[0] is a contraction head. */
+
+ if (ptr[1] == w_one || ptr[1] == w_many)
+ {
+ /* Contraction head followed by a wildcard, quit. */
+ goto fill_max_and_min;
+ }
+
+ /*
+ Some letters can be both contraction heads and contraction tails.
+ For example, in Danish 'aa' is a separate single letter which
+ is sorted after 'z'. So 'a' can be both head and tail.
+
+ If ptr[0]+ptr[1] is a contraction,
+ then put both letters together.
+
+ If ptr[1] can be a contraction part, but ptr[0]+ptr[1]
+ is not a contraction, then we put only ptr[0],
+ and continue with ptr[1] on the next loop.
+ */
+ if (contraction_flags[(uchar) ptr[1]] &&
+ cs->contractions[(*ptr-0x40)*0x40 + ptr[1] - 0x40])
+ {
+ /* Contraction found */
+ if (maxcharlen == 1 || min_str + 1 >= min_end)
+ {
+ /* Both contraction parts don't fit, quit */
+ goto fill_max_and_min;
+ }
+
+ /* Put contraction head */
+ *min_str++= *max_str++= *ptr++;
+ maxcharlen--;
+ }
+ }
+ /* Put contraction tail, or a single character */
+ *min_str++= *max_str++= *ptr++;
+ }
}
*min_length= *max_length = (uint) (min_str - min_org);
diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c
index e57204f8d33..8b1b0d6790d 100644
--- a/strings/ctype-simple.c
+++ b/strings/ctype-simple.c
@@ -179,8 +179,8 @@ int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *a, uint a_length,
}
for (end= a + a_length-length; a < end ; a++)
{
- if (*a != ' ')
- return (*a < ' ') ? -swap : swap;
+ if (map[*a] != ' ')
+ return (map[*a] < ' ') ? -swap : swap;
}
}
return res;
diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c
index 4dbda0b9239..81fb9ee1970 100644
--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@@ -7937,10 +7937,16 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(uint))
/* Now process contractions */
if (ncontractions)
{
- uint size= 0x40*0x40*sizeof(uint16); /* 8K, for basic latin letter only */
+ /*
+ 8K for weights for basic latin letter pairs,
+ plus 256 bytes for "is contraction part" flags.
+ */
+ uint size= 0x40*0x40*sizeof(uint16) + 256;
+ char *contraction_flags;
if (!(cs->contractions= (uint16*) (*alloc)(size)))
return 1;
bzero((void*)cs->contractions, size);
+ contraction_flags= ((char*) cs->contractions) + 0x40*0x40;
for (i=0; i < rc; i++)
{
if (rule[i].curr[1])
@@ -7966,6 +7972,9 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(uint))
/* Copy base weight applying primary difference */
cs->contractions[offsc]= offsb[0] + rule[i].diff[0];
+ /* Mark both letters as "is contraction part */
+ contraction_flags[rule[i].curr[0]]= 1;
+ contraction_flags[rule[i].curr[1]]= 1;
}
}
}
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index 6b1ba3c1ef6..b5353c55e4c 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -1484,7 +1484,10 @@ void my_hash_sort_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)),
const uchar *pos = key;
key+= len;
-
+
+ while (key > pos+1 && key[-1] == ' ' && key[-2] == '\0')
+ key-= 2;
+
for (; pos < (uchar*) key ; pos++)
{
nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) *
@@ -1521,6 +1524,8 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs,
char *min_org=min_str;
char *min_end=min_str+res_length;
uint charlen= res_length / cs->mbmaxlen;
+ const char *contraction_flags= cs->contractions ?
+ ((const char*) cs->contractions) + 0x40*0x40 : NULL;
for ( ; ptr + 1 < end && min_str + 1 < min_end && charlen > 0
; ptr+=2, charlen--)
@@ -1542,6 +1547,7 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs,
}
if (ptr[0] == '\0' && ptr[1] == w_many) /* '%' in SQL */
{
+fill_max_and_min:
/*
Calculate length of keys:
'a\0\0... is the smallest possible string when we have space expand
@@ -1558,6 +1564,38 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs,
} while (min_str + 1 < min_end);
return 0;
}
+
+ if (contraction_flags && ptr + 3 < end &&
+ ptr[0] == '\0' && contraction_flags[(uchar) ptr[1]])
+ {
+ /* Contraction head found */
+ if (ptr[2] == '\0' && (ptr[3] == w_one || ptr[3] == w_many))
+ {
+ /* Contraction head followed by a wildcard, quit */
+ goto fill_max_and_min;
+ }
+
+ /*
+ Check if the second letter can be contraction part,
+ and if two letters really produce a contraction.
+ */
+ if (ptr[2] == '\0' && contraction_flags[(uchar) ptr[3]] &&
+ cs->contractions[(ptr[1]-0x40)*0x40 + ptr[3] - 0x40])
+ {
+ /* Contraction found */
+ if (charlen == 1 || min_str + 2 >= min_end)
+ {
+ /* Full contraction doesn't fit, quit */
+ goto fill_max_and_min;
+ }
+
+ /* Put contraction head */
+ *min_str++= *max_str++= *ptr++;
+ *min_str++= *max_str++= *ptr++;
+ charlen--;
+ }
+ }
+ /* Put contraction tail, or a single character */
*min_str++= *max_str++ = ptr[0];
*min_str++= *max_str++ = ptr[1];
}
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index 387ce16a43d..4682868562f 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -2802,16 +2802,19 @@ static int my_strnncoll_utf8_cs(CHARSET_INFO *cs,
static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs,
const uchar *s, uint slen,
const uchar *t, uint tlen,
- my_bool diff_if_only_endspace_difference
- __attribute__((unused)))
+ my_bool diff_if_only_endspace_difference)
{
- int s_res,t_res;
- my_wc_t s_wc,t_wc;
- const uchar *se= s+slen;
- const uchar *te= t+tlen;
- int save_diff = 0;
+ int s_res, t_res, res;
+ my_wc_t s_wc, t_wc;
+ const uchar *se= s + slen;
+ const uchar *te= t + tlen;
+ int save_diff= 0;
MY_UNICASE_INFO **uni_plane= cs->caseinfo;
-
+
+#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
+ diff_if_only_endspace_difference= 0;
+#endif
+
while ( s < se && t < te )
{
int plane;
@@ -2843,16 +2846,20 @@ static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs,
slen= se-s;
tlen= te-t;
+ res= 0;
if (slen != tlen)
{
- int swap= 0;
+ int swap= 1;
+ if (diff_if_only_endspace_difference)
+ res= 1; /* Assume 'a' is bigger */
if (slen < tlen)
{
slen= tlen;
s= t;
se= te;
swap= -1;
+ res= -res;
}
/*
This following loop uses the fact that in UTF-8
@@ -2866,8 +2873,8 @@ static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs,
*/
for ( ; s < se; s++)
{
- if (*s != ' ')
- return ((int)*s - (int) ' ') ^ swap;
+ if (*s != (uchar) ' ')
+ return (*s < (uchar) ' ') ? -swap : swap;
}
}
return save_diff;
diff --git a/strings/strtod.c b/strings/strtod.c
index 7196cafb2c9..5fe59d10bd2 100644
--- a/strings/strtod.c
+++ b/strings/strtod.c
@@ -31,13 +31,40 @@
#define MAX_DBL_EXP 308
#define MAX_RESULT_FOR_MAX_EXP 1.7976931348623157
-static double scaler10[] = {
- 1.0, 1e10, 1e20, 1e30, 1e40, 1e50, 1e60, 1e70, 1e80, 1e90
-};
-static double scaler1[] = {
- 1.0, 10.0, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9
-};
+const double log_10[] = {
+ 1e000, 1e001, 1e002, 1e003, 1e004, 1e005, 1e006, 1e007, 1e008, 1e009,
+ 1e010, 1e011, 1e012, 1e013, 1e014, 1e015, 1e016, 1e017, 1e018, 1e019,
+ 1e020, 1e021, 1e022, 1e023, 1e024, 1e025, 1e026, 1e027, 1e028, 1e029,
+ 1e030, 1e031, 1e032, 1e033, 1e034, 1e035, 1e036, 1e037, 1e038, 1e039,
+ 1e040, 1e041, 1e042, 1e043, 1e044, 1e045, 1e046, 1e047, 1e048, 1e049,
+ 1e050, 1e051, 1e052, 1e053, 1e054, 1e055, 1e056, 1e057, 1e058, 1e059,
+ 1e060, 1e061, 1e062, 1e063, 1e064, 1e065, 1e066, 1e067, 1e068, 1e069,
+ 1e070, 1e071, 1e072, 1e073, 1e074, 1e075, 1e076, 1e077, 1e078, 1e079,
+ 1e080, 1e081, 1e082, 1e083, 1e084, 1e085, 1e086, 1e087, 1e088, 1e089,
+ 1e090, 1e091, 1e092, 1e093, 1e094, 1e095, 1e096, 1e097, 1e098, 1e099,
+ 1e100, 1e101, 1e102, 1e103, 1e104, 1e105, 1e106, 1e107, 1e108, 1e109,
+ 1e110, 1e111, 1e112, 1e113, 1e114, 1e115, 1e116, 1e117, 1e118, 1e119,
+ 1e120, 1e121, 1e122, 1e123, 1e124, 1e125, 1e126, 1e127, 1e128, 1e129,
+ 1e130, 1e131, 1e132, 1e133, 1e134, 1e135, 1e136, 1e137, 1e138, 1e139,
+ 1e140, 1e141, 1e142, 1e143, 1e144, 1e145, 1e146, 1e147, 1e148, 1e149,
+ 1e150, 1e151, 1e152, 1e153, 1e154, 1e155, 1e156, 1e157, 1e158, 1e159,
+ 1e160, 1e161, 1e162, 1e163, 1e164, 1e165, 1e166, 1e167, 1e168, 1e169,
+ 1e170, 1e171, 1e172, 1e173, 1e174, 1e175, 1e176, 1e177, 1e178, 1e179,
+ 1e180, 1e181, 1e182, 1e183, 1e184, 1e185, 1e186, 1e187, 1e188, 1e189,
+ 1e190, 1e191, 1e192, 1e193, 1e194, 1e195, 1e196, 1e197, 1e198, 1e199,
+ 1e200, 1e201, 1e202, 1e203, 1e204, 1e205, 1e206, 1e207, 1e208, 1e209,
+ 1e210, 1e211, 1e212, 1e213, 1e214, 1e215, 1e216, 1e217, 1e218, 1e219,
+ 1e220, 1e221, 1e222, 1e223, 1e224, 1e225, 1e226, 1e227, 1e228, 1e229,
+ 1e230, 1e231, 1e232, 1e233, 1e234, 1e235, 1e236, 1e237, 1e238, 1e239,
+ 1e240, 1e241, 1e242, 1e243, 1e244, 1e245, 1e246, 1e247, 1e248, 1e249,
+ 1e250, 1e251, 1e252, 1e253, 1e254, 1e255, 1e256, 1e257, 1e258, 1e259,
+ 1e260, 1e261, 1e262, 1e263, 1e264, 1e265, 1e266, 1e267, 1e268, 1e269,
+ 1e270, 1e271, 1e272, 1e273, 1e274, 1e275, 1e276, 1e277, 1e278, 1e279,
+ 1e280, 1e281, 1e282, 1e283, 1e284, 1e285, 1e286, 1e287, 1e288, 1e289,
+ 1e290, 1e291, 1e292, 1e293, 1e294, 1e295, 1e296, 1e297, 1e298, 1e299,
+ 1e300, 1e301, 1e302, 1e303, 1e304, 1e305, 1e306, 1e307, 1e308
+};
/*
Convert string to double (string doesn't have to be null terminated)
@@ -57,7 +84,7 @@ double my_strtod(const char *str, char **end_ptr, int *error)
{
double result= 0.0;
uint negative= 0, ndigits, dec_digits= 0, neg_exp= 0;
- int exponent= 0, digits_after_dec_point= 0, tmp_exp;
+ int exponent= 0, digits_after_dec_point= 0, tmp_exp, step;
const char *old_str, *end= *end_ptr, *start_of_number;
char next_char;
my_bool overflow=0;
@@ -179,16 +206,10 @@ double my_strtod(const char *str, char **end_ptr, int *error)
exponent= -exponent;
neg_exp= 1; /* neg_exp was 0 before */
}
- while (exponent >= 100)
- {
- result= neg_exp ? result/1.0e100 : result*1.0e100;
- exponent-= 100;
- }
- scaler= scaler10[exponent/10]*scaler1[exponent%10];
- if (neg_exp)
- result/= scaler;
- else
- result*= scaler;
+ step= array_elements(log_10) - 1;
+ for (; exponent > step; exponent-= step)
+ result= neg_exp ? result / log_10[step] : result * log_10[step];
+ result= neg_exp ? result / log_10[exponent] : result * log_10[exponent];
}
done: