diff options
author | unknown <bar@bar.intranet.mysql.r18.ru> | 2004-03-26 18:14:39 +0400 |
---|---|---|
committer | unknown <bar@bar.intranet.mysql.r18.ru> | 2004-03-26 18:14:39 +0400 |
commit | 63e1d22f8f46966c13d88a4f2e9acd7fa3e9c9b6 (patch) | |
tree | 0473522f9282da1daba81a8db274d031c52e0fe8 | |
parent | 403948cbb3a27da905269857616c60c8fc4675ba (diff) | |
download | mariadb-git-63e1d22f8f46966c13d88a4f2e9acd7fa3e9c9b6.tar.gz |
UTF8 now process space as PAD character correctly.
-rw-r--r-- | mysql-test/r/compare.result | 24 | ||||
-rw-r--r-- | mysql-test/r/ctype_utf8.result | 24 | ||||
-rw-r--r-- | mysql-test/t/compare.test | 17 | ||||
-rw-r--r-- | mysql-test/t/ctype_utf8.test | 17 | ||||
-rw-r--r-- | strings/ctype-utf8.c | 96 |
5 files changed, 170 insertions, 8 deletions
diff --git a/mysql-test/r/compare.result b/mysql-test/r/compare.result index 3ad3465fac0..bf8a5106044 100644 --- a/mysql-test/r/compare.result +++ b/mysql-test/r/compare.result @@ -12,3 +12,27 @@ select * from t1; id 000000000001 drop table t1; +SELECT 'a' = 'a '; +'a' = 'a ' +1 +SELECT 'a\0' < 'a'; +'a\0' < 'a' +1 +SELECT 'a\0' < 'a '; +'a\0' < 'a ' +1 +SELECT 'a\t' < 'a'; +'a\t' < 'a' +1 +SELECT 'a\t' < 'a '; +'a\t' < 'a ' +1 +CREATE TABLE t1 (a char(10) not null); +INSERT INTO t1 VALUES ('a'),('a\0'),('a\t'),('a '); +SELECT hex(a),STRCMP(a,'a'), STRCMP(a,'a ') FROM t1; +hex(a) STRCMP(a,'a') STRCMP(a,'a ') +61 0 0 +6100 -1 -1 +6109 -1 -1 +61 0 0 +DROP TABLE t1; diff --git a/mysql-test/r/ctype_utf8.result b/mysql-test/r/ctype_utf8.result index 33a7e91fbeb..28af71b7681 100644 --- a/mysql-test/r/ctype_utf8.result +++ b/mysql-test/r/ctype_utf8.result @@ -63,6 +63,30 @@ select 'A' like 'a' collate utf8_bin; select _utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%'); _utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%') 1 +SELECT 'a' = 'a '; +'a' = 'a ' +1 +SELECT 'a\0' < 'a'; +'a\0' < 'a' +1 +SELECT 'a\0' < 'a '; +'a\0' < 'a ' +1 +SELECT 'a\t' < 'a'; +'a\t' < 'a' +1 +SELECT 'a\t' < 'a '; +'a\t' < 'a ' +1 +CREATE TABLE t1 (a char(10) character set utf8 not null); +INSERT INTO t1 VALUES ('a'),('a\0'),('a\t'),('a '); +SELECT hex(a),STRCMP(a,'a'), STRCMP(a,'a ') FROM t1; +hex(a) STRCMP(a,'a') STRCMP(a,'a ') +61 0 0 +6100 -1 -1 +6109 -1 -1 +61 0 0 +DROP TABLE t1; select insert('txs',2,1,'hi'),insert('is ',4,0,'a'),insert('txxxxt',2,4,'es'); insert('txs',2,1,'hi') insert('is ',4,0,'a') insert('txxxxt',2,4,'es') this is a test diff --git a/mysql-test/t/compare.test b/mysql-test/t/compare.test index e5a2e310866..b0cef48dd3f 100644 --- a/mysql-test/t/compare.test +++ b/mysql-test/t/compare.test @@ -13,3 +13,20 @@ select * from t1 where id=000000000001; delete from t1 where id=000000000002; select * from t1; drop table t1; + +# +# Check the following: +# "a" == "a " +# "a\0" < "a" +# "a\0" < "a " + +SELECT 'a' = 'a '; +SELECT 'a\0' < 'a'; +SELECT 'a\0' < 'a '; +SELECT 'a\t' < 'a'; +SELECT 'a\t' < 'a '; + +CREATE TABLE t1 (a char(10) not null); +INSERT INTO t1 VALUES ('a'),('a\0'),('a\t'),('a '); +SELECT hex(a),STRCMP(a,'a'), STRCMP(a,'a ') FROM t1; +DROP TABLE t1; diff --git a/mysql-test/t/ctype_utf8.test b/mysql-test/t/ctype_utf8.test index 964ac84fbbe..4e68efeffc3 100644 --- a/mysql-test/t/ctype_utf8.test +++ b/mysql-test/t/ctype_utf8.test @@ -34,6 +34,23 @@ select 'A' like 'a' collate utf8_bin; select _utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%'); # +# Check the following: +# "a" == "a " +# "a\0" < "a" +# "a\0" < "a " + +SELECT 'a' = 'a '; +SELECT 'a\0' < 'a'; +SELECT 'a\0' < 'a '; +SELECT 'a\t' < 'a'; +SELECT 'a\t' < 'a '; + +CREATE TABLE t1 (a char(10) character set utf8 not null); +INSERT INTO t1 VALUES ('a'),('a\0'),('a\t'),('a '); +SELECT hex(a),STRCMP(a,'a'), STRCMP(a,'a ') FROM t1; +DROP TABLE t1; + +# # Fix this, it should return 1: # #select _utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD091,_utf8 '%'); diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index 886ecfbd0c9..82787f2b65f 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -1837,18 +1837,98 @@ static int my_strnncoll_utf8(CHARSET_INFO *cs, } + /* - TODO: Has to be fixed as strnncollsp in ctype-simple + Compare strings, discarding end space + + SYNOPSIS + my_strnncollsp_utf8() + cs character set handler + a First string to compare + a_length Length of 'a' + b Second string to compare + b_length Length of 'b' + + IMPLEMENTATION + If one string is shorter as the other, then we space extend the other + so that the strings have equal length. + + This will ensure that the following things hold: + + "a" == "a " + "a\0" < "a" + "a\0" < "a " + + RETURN + < 0 a < b + = 0 a == b + > 0 a > b */ -static -int my_strnncollsp_utf8(CHARSET_INFO * cs, - const uchar *s, uint slen, - const uchar *t, uint tlen) +static int my_strnncollsp_utf8(CHARSET_INFO *cs, + const uchar *s, uint slen, + const uchar *t, uint tlen) { - for ( ; slen && s[slen-1] == ' ' ; slen--); - for ( ; tlen && t[tlen-1] == ' ' ; tlen--); - return my_strnncoll_utf8(cs,s,slen,t,tlen); + int s_res,t_res; + my_wc_t s_wc,t_wc; + const uchar *se= s+slen; + const uchar *te= t+tlen; + + while ( s < se && t < te ) + { + int plane; + s_res=my_utf8_uni(cs,&s_wc, s, se); + t_res=my_utf8_uni(cs,&t_wc, t, te); + + if ( s_res <= 0 || t_res <= 0 ) + { + /* Incorrect string, compare by char value */ + return ((int)s[0]-(int)t[0]); + } + + plane=(s_wc>>8) & 0xFF; + s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc; + plane=(t_wc>>8) & 0xFF; + t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc; + if ( s_wc != t_wc ) + { + return ((int) s_wc) - ((int) t_wc); + } + + s+=s_res; + t+=t_res; + } + + slen= se-s; + tlen= te-t; + + if (slen != tlen) + { + int swap= 0; + if (slen < tlen) + { + slen= tlen; + s= t; + se= te; + swap= -1; + } + /* + This following loop uses the fact that in UTF-8 + all multibyte characters are greater than space, + and all multibyte head characters are greater than + space. It means if we meet a character greater + than space, it always means that the longer string + is greater. So we can reuse the same loop from the + 8bit version, without having to process full multibute + sequences. + */ + for ( ; s < se; s++) + { + if (*s != ' ') + return ((int)*s - (int) ' ') ^ swap; + } + } + return 0; } |