diff options
author | bar@mysql.com <> | 2004-08-19 15:15:10 +0500 |
---|---|---|
committer | bar@mysql.com <> | 2004-08-19 15:15:10 +0500 |
commit | 2496e85b84aad64a273dcb6ee45bb4a706c4b87d (patch) | |
tree | 01ac6df006d72fc8d39e0e1c185bc3c8bc11a7bb /strings | |
parent | 6b90806a4a5ee31ec9f5d9e59f1cd9e722dd1866 (diff) | |
download | mariadb-git-2496e85b84aad64a273dcb6ee45bb4a706c4b87d.tar.gz |
Bug#4521: unique key prefix interacts poorly with utf8.
Fix for binary collations for MyISAM and HEAP BTREE.
This patch also changes trailing spaces behaviour for
binary collations. Binary collations now have PAD
characteristic too.
Diffstat (limited to 'strings')
-rw-r--r-- | strings/ctype-bin.c | 108 | ||||
-rw-r--r-- | strings/ctype-mb.c | 57 |
2 files changed, 150 insertions, 15 deletions
diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c index cc83471f264..e759a5654f1 100644 --- a/strings/ctype-bin.c +++ b/strings/ctype-bin.c @@ -68,11 +68,22 @@ static uchar bin_char_array[] = +static int my_strnncoll_binary(CHARSET_INFO * cs __attribute__((unused)), + const uchar *s, uint slen, + const uchar *t, uint tlen, + my_bool t_is_prefix) +{ + uint len=min(slen,tlen); + int cmp= memcmp(s,t,len); + return cmp ? cmp : (int)((t_is_prefix ? len : slen) - tlen); +} + + /* Compare two strings. Result is sign(first_argument - second_argument) SYNOPSIS - my_strnncoll_binary() + my_strnncollsp_binary() cs Chararacter set s String to compare slen Length of 's' @@ -80,8 +91,9 @@ static uchar bin_char_array[] = tlen Length of 't' NOTE - This is used also when comparing with end space removal, as end space - is significant for binary strings + This function is used for real binary strings, i.e. for + BLOB, BINARY(N) and VARBINARY(N). + It does not ignore trailing spaces. RETURN < 0 s < t @@ -89,10 +101,18 @@ static uchar bin_char_array[] = > 0 s > t */ -static int my_strnncoll_binary(CHARSET_INFO * cs __attribute__((unused)), - const uchar *s, uint slen, - const uchar *t, uint tlen, - my_bool t_is_prefix) +static int my_strnncollsp_binary(CHARSET_INFO * cs __attribute__((unused)), + const uchar *s, uint slen, + const uchar *t, uint tlen) +{ + return my_strnncoll_binary(cs,s,slen,t,tlen,0); +} + + +static int my_strnncoll_8bit_bin(CHARSET_INFO * cs __attribute__((unused)), + const uchar *s, uint slen, + const uchar *t, uint tlen, + my_bool t_is_prefix) { uint len=min(slen,tlen); int cmp= memcmp(s,t,len); @@ -100,11 +120,61 @@ static int my_strnncoll_binary(CHARSET_INFO * cs __attribute__((unused)), } -static int my_strnncollsp_binary(CHARSET_INFO * cs __attribute__((unused)), - const uchar *s, uint slen, - const uchar *t, uint tlen) +/* + Compare two strings. Result is sign(first_argument - second_argument) + + SYNOPSIS + my_strnncollsp_8bit_bin() + cs Chararacter set + s String to compare + slen Length of 's' + t String to compare + tlen Length of 't' + + NOTE + This function is used for character strings with binary collations. + It ignores trailing spaces. + + RETURN + < 0 s < t + 0 s == t + > 0 s > t +*/ + +static int my_strnncollsp_8bit_bin(CHARSET_INFO * cs __attribute__((unused)), + const uchar *a, uint a_length, + const uchar *b, uint b_length) { - return my_strnncoll_binary(cs,s,slen,t,tlen,0); + const uchar *end; + uint length; + + end= a + (length= min(a_length, b_length)); + while (a < end) + { + if (*a++ != *b++) + return ((int) a[-1] - (int) b[-1]); + } + if (a_length != b_length) + { + int swap= 0; + /* + Check the next not space character of the longer key. If it's < ' ', + then it's smaller than the other key. + */ + if (a_length < b_length) + { + /* put shorter key in s */ + a_length= b_length; + a= b; + swap= -1; /* swap sign of result */ + } + for (end= a + a_length-length; a < end ; a++) + { + if (*a != ' ') + return ((int) *a - (int) ' ') ^ swap; + } + } + return 0; } @@ -344,6 +414,20 @@ skip: MY_COLLATION_HANDLER my_collation_8bit_bin_handler = { NULL, /* init */ + my_strnncoll_8bit_bin, + my_strnncollsp_8bit_bin, + my_strnxfrm_bin, + my_like_range_simple, + my_wildcmp_bin, + my_strcasecmp_bin, + my_instr_bin, + my_hash_sort_bin +}; + + +static MY_COLLATION_HANDLER my_collation_binary_handler = +{ + NULL, /* init */ my_strnncoll_binary, my_strnncollsp_binary, my_strnxfrm_bin, @@ -407,5 +491,5 @@ CHARSET_INFO my_charset_bin = 0, /* min_sort_char */ 255, /* max_sort_char */ &my_charset_handler, - &my_collation_8bit_bin_handler + &my_collation_binary_handler }; diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c index 7b0dadcfa19..ecafa6356d5 100644 --- a/strings/ctype-mb.c +++ b/strings/ctype-mb.c @@ -360,11 +360,62 @@ static int my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)), return cmp ? cmp : (int) ((t_is_prefix ? len : slen) - tlen); } + +/* + Compare two strings. + + SYNOPSIS + my_strnncollsp_mb_bin() + cs Chararacter set + s String to compare + slen Length of 's' + t String to compare + tlen Length of 't' + + NOTE + This function is used for character strings with binary collations. + It ignores trailing spaces. + + RETURN + A negative number if s < t + A positive number if s > t + 0 if strings are equal +*/ + static int my_strnncollsp_mb_bin(CHARSET_INFO * cs __attribute__((unused)), - const uchar *s, uint slen, - const uchar *t, uint tlen) + const uchar *a, uint a_length, + const uchar *b, uint b_length) { - return my_strnncoll_mb_bin(cs,s,slen,t,tlen,0); + const uchar *end; + uint length; + + end= a + (length= min(a_length, b_length)); + while (a < end) + { + if (*a++ != *b++) + return ((int) a[-1] - (int) b[-1]); + } + if (a_length != b_length) + { + int swap= 0; + /* + Check the next not space character of the longer key. If it's < ' ', + then it's smaller than the other key. + */ + if (a_length < b_length) + { + /* put shorter key in s */ + a_length= b_length; + a= b; + swap= -1; /* swap sign of result */ + } + for (end= a + a_length-length; a < end ; a++) + { + if (*a != ' ') + return ((int) *a - (int) ' ') ^ swap; + } + } + return 0; } |