diff options
author | unknown <svoj@mysql.com/june.mysql.com> | 2007-06-27 18:10:19 +0500 |
---|---|---|
committer | unknown <svoj@mysql.com/june.mysql.com> | 2007-06-27 18:10:19 +0500 |
commit | 030d98d3971aaabbbeb37ed641629944a97186f5 (patch) | |
tree | 8521349d513536e124d936118690ca0abb5320a9 | |
parent | b3b8d5165d76de270532120dd8ce2a216308c0bd (diff) | |
download | mariadb-git-030d98d3971aaabbbeb37ed641629944a97186f5.tar.gz |
BUG#29299 - repeatable myisam fulltext index corruption
Fulltext index may get corrupt by certain gbk characters.
The problem was that when skipping leading non-true-word-characters,
we assumed that these characters are always 1 byte long. This is not
the case with gbk character set, since non-true-word-characters may
be 2 bytes long.
Affects 5.0 only.
myisam/ft_parser.c:
Leading non-true-word-characters may also be multi-byte (e.g. in
gbk character set).
mysql-test/r/fulltext2.result:
A test case for BUG#29299.
mysql-test/t/fulltext2.test:
A test case for BUG#29299.
-rw-r--r-- | myisam/ft_parser.c | 6 | ||||
-rw-r--r-- | mysql-test/r/fulltext2.result | 12 | ||||
-rw-r--r-- | mysql-test/t/fulltext2.test | 12 |
3 files changed, 28 insertions, 2 deletions
diff --git a/myisam/ft_parser.c b/myisam/ft_parser.c index 6c79f9249cf..6d68542e4e2 100644 --- a/myisam/ft_parser.c +++ b/myisam/ft_parser.c @@ -111,7 +111,7 @@ byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end, while (doc<end) { - for (;doc<end;doc++) + for (; doc < end; doc+= mbl) { if (true_word_char(cs,*doc)) break; if (*doc == FTB_RQUOT && param->quot) @@ -120,6 +120,7 @@ byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end, *start=doc+1; return 3; /* FTB_RBR */ } + mbl= my_mbcharlen(cs, *(uchar *)doc); if (!param->quot) { if (*doc == FTB_LBR || *doc == FTB_RBR || *doc == FTB_LQUOT) @@ -187,10 +188,11 @@ byte ft_simple_get_word(CHARSET_INFO *cs, byte **start, const byte *end, do { - for (;; doc++) + for (;; doc+= mbl) { if (doc >= end) DBUG_RETURN(0); if (true_word_char(cs, *doc)) break; + mbl= my_mbcharlen(cs, *(uchar *)doc); } mwc= length= 0; diff --git a/mysql-test/r/fulltext2.result b/mysql-test/r/fulltext2.result index f6a4b20bc22..f6bb4e4965a 100644 --- a/mysql-test/r/fulltext2.result +++ b/mysql-test/r/fulltext2.result @@ -241,3 +241,15 @@ select * from t1 where match a against('ab c' in boolean mode); a drop table t1; set names latin1; +CREATE TABLE t1(a VARCHAR(255) CHARACTER SET gbk, FULLTEXT(a)); +SET NAMES utf8; +INSERT INTO t1 VALUES(0xF043616161),(0xBEF361616197C22061616161); +SELECT HEX(a) FROM t1 WHERE MATCH(a) AGAINST(0x97C22061616161 IN BOOLEAN MODE); +HEX(a) +BEF361616197C22061616161 +DELETE FROM t1 LIMIT 1; +CHECK TABLE t1; +Table Op Msg_type Msg_text +test.t1 check status OK +SET NAMES latin1; +DROP TABLE t1; diff --git a/mysql-test/t/fulltext2.test b/mysql-test/t/fulltext2.test index fd97f795534..99209dc8543 100644 --- a/mysql-test/t/fulltext2.test +++ b/mysql-test/t/fulltext2.test @@ -220,4 +220,16 @@ select * from t1 where match a against('ab c' in boolean mode); drop table t1; set names latin1; +# +# BUG#29299 - repeatable myisam fulltext index corruption +# +CREATE TABLE t1(a VARCHAR(255) CHARACTER SET gbk, FULLTEXT(a)); +SET NAMES utf8; +INSERT INTO t1 VALUES(0xF043616161),(0xBEF361616197C22061616161); +SELECT HEX(a) FROM t1 WHERE MATCH(a) AGAINST(0x97C22061616161 IN BOOLEAN MODE); +DELETE FROM t1 LIMIT 1; +CHECK TABLE t1; +SET NAMES latin1; +DROP TABLE t1; + # End of 4.1 tests |